Skip to content

Commit ebe62e6

Browse files
bench_tools: run and compare benchmark
1 parent 7e56c55 commit ebe62e6

File tree

4 files changed

+193
-1
lines changed

4 files changed

+193
-1
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
use std::fs;
2+
use std::path::PathBuf;
3+
4+
use crate::types::estimates::Estimates;
5+
6+
/// Result of a benchmark comparison.
7+
#[derive(Debug)]
8+
pub struct BenchmarkComparison {
9+
pub name: String,
10+
pub change_percentage: f64,
11+
pub exceeds_limit: bool,
12+
}
13+
14+
/// Loads change estimates from criterion's change directory for a given benchmark.
15+
/// Panics if the change file doesn't exist.
16+
fn load_change_estimates(bench_name: &str) -> Estimates {
17+
let change_path =
18+
PathBuf::from("target/criterion").join(bench_name).join("change/estimates.json");
19+
20+
if !change_path.exists() {
21+
panic!(
22+
"Change file not found for benchmark '{}': {}\nThis likely means no baseline exists. \
23+
Run the benchmark at least once before using run-and-compare.",
24+
bench_name,
25+
change_path.display()
26+
);
27+
}
28+
29+
let data = fs::read_to_string(&change_path)
30+
.unwrap_or_else(|e| panic!("Failed to read {}: {}", change_path.display(), e));
31+
32+
serde_json::from_str(&data).unwrap_or_else(|e| {
33+
panic!("Failed to deserialize {}: {}\nContent: {}", change_path.display(), e, data)
34+
})
35+
}
36+
37+
/// Converts change estimates to percentage.
38+
/// The mean.point_estimate in change/estimates.json represents fractional change
39+
/// (e.g., 0.0706 = 7.06% change).
40+
fn get_regression_percentage(change_estimates: &Estimates) -> f64 {
41+
change_estimates.mean.point_estimate * 100.0
42+
}
43+
44+
/// Checks all benchmarks for regressions against a specified limit.
45+
/// Returns a vector of comparison results for all benchmarks.
46+
/// If any benchmark exceeds the regression limit, returns an error with detailed results.
47+
/// Panics if change file is not found for any benchmark.
48+
pub fn check_regressions(
49+
bench_names: &[&str],
50+
regression_limit: f64,
51+
) -> Result<Vec<BenchmarkComparison>, (String, Vec<BenchmarkComparison>)> {
52+
let mut results = Vec::new();
53+
let mut exceeded_count = 0;
54+
55+
for bench_name in bench_names {
56+
let change_estimates = load_change_estimates(bench_name);
57+
let change_percentage = get_regression_percentage(&change_estimates);
58+
let exceeds_limit = change_percentage > regression_limit;
59+
60+
if exceeds_limit {
61+
exceeded_count += 1;
62+
}
63+
64+
results.push(BenchmarkComparison {
65+
name: bench_name.to_string(),
66+
change_percentage,
67+
exceeds_limit,
68+
});
69+
}
70+
71+
if exceeded_count > 0 {
72+
let error_msg = format!("{} benchmark(s) exceeded regression threshold!", exceeded_count);
73+
Err((error_msg, results))
74+
} else {
75+
Ok(results)
76+
}
77+
}
78+
79+
#[cfg(test)]
80+
mod tests {
81+
use super::*;
82+
83+
#[test]
84+
fn test_get_regression_percentage() {
85+
let estimates = Estimates {
86+
mean: crate::types::estimates::Stat {
87+
point_estimate: 0.0706,
88+
standard_error: 0.01,
89+
confidence_interval: crate::types::estimates::ConfidenceInterval {
90+
confidence_level: 0.95,
91+
lower_bound: 0.05,
92+
upper_bound: 0.09,
93+
},
94+
},
95+
median: crate::types::estimates::Stat {
96+
point_estimate: 0.03,
97+
standard_error: 0.01,
98+
confidence_interval: crate::types::estimates::ConfidenceInterval {
99+
confidence_level: 0.95,
100+
lower_bound: 0.01,
101+
upper_bound: 0.05,
102+
},
103+
},
104+
std_dev: None,
105+
median_abs_dev: None,
106+
slope: None,
107+
};
108+
109+
let percentage = get_regression_percentage(&estimates);
110+
assert!((percentage - 7.06).abs() < 0.01);
111+
}
112+
}

crates/bench_tools/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#[cfg(test)]
22
pub(crate) mod benches;
3+
pub mod comparison;
34
pub mod gcs;
45
pub mod runner;
56
pub mod types;

crates/bench_tools/src/main.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,22 @@ enum Commands {
3030
#[arg(long)]
3131
input_dir: Option<String>,
3232
},
33+
/// Run benchmarks, compare to previous run, and fail if regression exceeds limit.
34+
RunAndCompare {
35+
/// Package name to run benchmarks for.
36+
#[arg(short, long)]
37+
package: String,
38+
/// Output directory for results.
39+
#[arg(short, long)]
40+
out: String,
41+
/// Optional: Local directory containing input files. If not provided, inputs will be
42+
/// downloaded from GCS for benchmarks that require them.
43+
#[arg(long)]
44+
input_dir: Option<String>,
45+
/// Maximum acceptable regression percentage (e.g., 5.0 for 5%).
46+
#[arg(long)]
47+
regression_limit: f64,
48+
},
3349
/// List benchmarks for a package.
3450
List {
3551
/// Package name to list benchmarks for. If not provided, lists all benchmarks.
@@ -60,6 +76,21 @@ async fn main() {
6076

6177
bench_tools::runner::run_benchmarks(&benchmarks, input_dir.as_deref(), &out).await;
6278
}
79+
Commands::RunAndCompare { package, out, input_dir, regression_limit } => {
80+
let benchmarks = find_benchmarks_by_package(&package);
81+
82+
if benchmarks.is_empty() {
83+
panic!("No benchmarks found for package: {}", package);
84+
}
85+
86+
bench_tools::runner::run_and_compare_benchmarks(
87+
&benchmarks,
88+
input_dir.as_deref(),
89+
&out,
90+
regression_limit,
91+
)
92+
.await;
93+
}
6394
Commands::List { package } => match package {
6495
Some(package_name) => {
6596
let benchmarks = find_benchmarks_by_package(&package_name);

crates/bench_tools/src/runner.rs

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@ async fn prepare_inputs(bench: &BenchmarkConfig, input_dir: Option<&str>) {
4343
} else {
4444
gcs::download_inputs(bench.name, &benchmark_input_dir).await;
4545
if !benchmark_input_dir.exists() {
46-
panic!("Failed to download inputs for {}: {}", bench.name, benchmark_input_dir.display());
46+
panic!(
47+
"Failed to download inputs for {}: {}",
48+
bench.name,
49+
benchmark_input_dir.display()
50+
);
4751
}
4852
}
4953
}
@@ -116,3 +120,47 @@ pub async fn run_benchmarks(
116120

117121
println!("\n✓ All benchmarks completed! Results saved to: {}", output_dir);
118122
}
123+
124+
/// Runs benchmarks and compares them against previous results, failing if regression exceeds limit.
125+
pub async fn run_and_compare_benchmarks(
126+
benchmarks: &[&BenchmarkConfig],
127+
input_dir: Option<&str>,
128+
output_dir: &str,
129+
regression_limit: f64,
130+
) {
131+
// Run benchmarks first.
132+
run_benchmarks(benchmarks, input_dir, output_dir).await;
133+
134+
// Collect all criterion benchmark names from configs.
135+
let mut bench_names = Vec::new();
136+
for bench in benchmarks {
137+
bench_names.extend(bench.criterion_benchmark_names.unwrap_or(&[bench.name]));
138+
}
139+
140+
println!("\n📊 Checking for performance regressions (limit: {}%):", regression_limit);
141+
let regression_result = crate::comparison::check_regressions(&bench_names, regression_limit);
142+
143+
match regression_result {
144+
Ok(_) => {
145+
println!("\n✅ All benchmarks passed regression check!");
146+
}
147+
Err((error_msg, results)) => {
148+
// Some benchmarks exceeded the limit - print detailed results.
149+
println!("\nBenchmark Results:");
150+
for result in results {
151+
if result.exceeds_limit {
152+
println!(
153+
" ❌ {}: {:+.2}% (EXCEEDS {:.1}% limit)",
154+
result.name, result.change_percentage, regression_limit
155+
);
156+
} else {
157+
println!(
158+
" ✓ {}: {:+.2}% (within {:.1}% limit)",
159+
result.name, result.change_percentage, regression_limit
160+
);
161+
}
162+
}
163+
panic!("\n{}", error_msg);
164+
}
165+
}
166+
}

0 commit comments

Comments
 (0)