ci: deserialize cargo bench result (#9624)

AvivYossef-starkware · web-flow · commit 2a4cd4dbfc20 · 2025-10-23T10:00:06.000Z
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/bench_tools/Cargo.toml b/crates/bench_tools/Cargo.toml
@@ -8,3 +8,16 @@ workspace = true
 
 [dependencies]
 clap = { workspace = true, features = ["derive"] }
+criterion.workspace = true
+serde = { workspace = true, features = ["derive"] }
+
+[dev-dependencies]
+apollo_infra_utils.workspace = true
+glob.workspace = true
+rstest.workspace = true
+serde_json.workspace = true
+
+[[bench]]
+harness = false
+name = "dummy_bench"
+path = "src/benches/dummy_bench.rs"
diff --git a/crates/bench_tools/data/dummy_benches_result/dummy_sum_1000_estimates.json b/crates/bench_tools/data/dummy_benches_result/dummy_sum_1000_estimates.json
@@ -0,0 +1,47 @@
+{
+  "mean": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.4627450463077952,
+      "upper_bound": 0.4672721018359347
+    },
+    "point_estimate": 0.464843138908376,
+    "standard_error": 0.001159579008842323
+  },
+  "median": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.46018939259554725,
+      "upper_bound": 0.4630457711597157
+    },
+    "point_estimate": 0.46110773204298744,
+    "standard_error": 0.0007700792406655315
+  },
+  "median_abs_dev": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.004389190374631315,
+      "upper_bound": 0.008068153741443406
+    },
+    "point_estimate": 0.0057180067999875826,
+    "standard_error": 0.0008869433017250109
+  },
+  "slope": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.4613537171491014,
+      "upper_bound": 0.4679905568783797
+    },
+    "point_estimate": 0.4641046136274776,
+    "standard_error": 0.001730736547763549
+  },
+  "std_dev": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.007242933241527838,
+      "upper_bound": 0.01557482754536567
+    },
+    "point_estimate": 0.01165848802634422,
+    "standard_error": 0.00216291128423961
+  }
+}
diff --git a/crates/bench_tools/data/dummy_benches_result/dummy_sum_100_estimates.json b/crates/bench_tools/data/dummy_benches_result/dummy_sum_100_estimates.json
@@ -0,0 +1,47 @@
+{
+  "mean": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.45957162353881537,
+      "upper_bound": 0.47252808549400704
+    },
+    "point_estimate": 0.4656174818033215,
+    "standard_error": 0.0033071130641006297
+  },
+  "median": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.45151445843970833,
+      "upper_bound": 0.4538904649825635
+    },
+    "point_estimate": 0.4526323188039608,
+    "standard_error": 0.0006992971024409143
+  },
+  "median_abs_dev": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.005240482428601258,
+      "upper_bound": 0.010332193401481504
+    },
+    "point_estimate": 0.007727865411134973,
+    "standard_error": 0.001301158952978215
+  },
+  "slope": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.455400693078925,
+      "upper_bound": 0.46369875851255066
+    },
+    "point_estimate": 0.45933814542565166,
+    "standard_error": 0.002119888457921763
+  },
+  "std_dev": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.02332892198500483,
+      "upper_bound": 0.041110549300589966
+    },
+    "point_estimate": 0.03317521516535013,
+    "standard_error": 0.004530135795169152
+  }
+}
diff --git a/crates/bench_tools/src/benches.rs b/crates/bench_tools/src/benches.rs
@@ -0,0 +1,2 @@
+#[cfg(test)]
+pub(crate) mod dummy_bench;
diff --git a/crates/bench_tools/src/benches/dummy_bench.rs b/crates/bench_tools/src/benches/dummy_bench.rs
@@ -0,0 +1,23 @@
+use std::hint::black_box;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+#[allow(dead_code)]
+fn dummy_function(n: u64) -> u64 {
+    // Simple function that does some work
+    (0..n).sum()
+}
+
+/// Example benchmark function that demonstrates how to use Criterion for benchmarking.
+/// This is used to test the benchmarking infrastructure and generate sample benchmark results
+/// that can be parsed by the bench_tools framework.
+#[allow(dead_code)]
+fn dummy_benchmark(c: &mut Criterion) {
+    // black_box prevents the compiler from optimizing away the function call during benchmarking
+    c.bench_function("dummy_sum_100", |b| b.iter(|| black_box(dummy_function(100))));
+
+    c.bench_function("dummy_sum_1000", |b| b.iter(|| black_box(dummy_function(1000))));
+}
+
+criterion_group!(benches, dummy_benchmark);
+criterion_main!(benches);
diff --git a/crates/bench_tools/src/lib.rs b/crates/bench_tools/src/lib.rs
@@ -0,0 +1,3 @@
+#[cfg(test)]
+pub(crate) mod benches;
+pub mod types;
diff --git a/crates/bench_tools/src/types.rs b/crates/bench_tools/src/types.rs
@@ -0,0 +1,3 @@
+pub mod estimates;
+#[cfg(test)]
+mod estimates_test;
diff --git a/crates/bench_tools/src/types/estimates.rs b/crates/bench_tools/src/types/estimates.rs
@@ -0,0 +1,30 @@
+use serde::Deserialize;
+
+/// Criterion benchmark estimates.
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct Estimates {
+    pub mean: Stat,
+    pub median: Stat,
+    pub std_dev: Stat,
+    pub median_abs_dev: Stat,
+    pub slope: Option<Stat>,
+}
+
+/// Statistical estimate with confidence interval.
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct Stat {
+    pub point_estimate: f64,
+    pub standard_error: f64,
+    pub confidence_interval: ConfidenceInterval,
+}
+
+/// Confidence interval bounds.
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct ConfidenceInterval {
+    pub confidence_level: f64,
+    pub lower_bound: f64,
+    pub upper_bound: f64,
+}
diff --git a/crates/bench_tools/src/types/estimates_test.rs b/crates/bench_tools/src/types/estimates_test.rs
@@ -0,0 +1,88 @@
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+use apollo_infra_utils::path::project_path;
+use rstest::{fixture, rstest};
+
+use crate::types::estimates::Estimates;
+
+/// Returns the bench_tools crate directory.
+#[fixture]
+fn bench_tools_crate_dir() -> PathBuf {
+    std::env::var("CARGO_MANIFEST_DIR")
+        .map(PathBuf::from)
+        .unwrap_or_else(|_| std::env::current_dir().unwrap())
+}
+
+/// Returns the directory where dummy benchmark estimate results are stored.
+#[fixture]
+fn dummy_bench_results_dir(bench_tools_crate_dir: PathBuf) -> PathBuf {
+    bench_tools_crate_dir.join("data/dummy_benches_result")
+}
+
+/// Returns the workspace root directory.
+#[fixture]
+fn workspace_root() -> PathBuf {
+    project_path().expect("Failed to get project path")
+}
+
+///  Returns the list of dummy benchmark names.
+#[fixture]
+fn dummy_bench_names() -> &'static [&'static str] {
+    &["dummy_sum_100", "dummy_sum_1000"]
+}
+
+/// Helper function to deserialize dummy bench estimates JSON files in a directory.
+fn assert_deserialize_dummy_bench_estimates(results_dir: &Path, bench_names: &[&str]) {
+    for bench_name in bench_names {
+        let path = results_dir.join(format!("{}_estimates.json", bench_name));
+        let data = fs::read_to_string(&path)
+            .unwrap_or_else(|e| panic!("Failed to read {}: {}", path.display(), e));
+
+        let _est: Estimates = serde_json::from_str(&data).unwrap_or_else(|e| {
+            panic!("Failed to deserialize {}: {}\nContent: {}", path.display(), e, data)
+        });
+    }
+}
+
+#[rstest]
+#[ignore]
+/// Run dummy benchmark and deserialize the results.
+fn run_dummy_bench_and_deserialize_estimates(
+    workspace_root: PathBuf,
+    dummy_bench_results_dir: PathBuf,
+    dummy_bench_names: &[&str],
+) {
+    // 1) Run dummy benchmark.
+    let status = Command::new("cargo")
+        .args(["bench", "-p", "bench_tools", "--bench", "dummy_bench"])
+        .status()
+        .expect("Failed to spawn `cargo bench`");
+    assert!(status.success(), "`cargo bench` did not exit successfully");
+
+    // 2) Collect and save dummy_bench estimates.json files
+    fs::create_dir_all(&dummy_bench_results_dir).expect("Failed to create results directory");
+
+    for bench_name in dummy_bench_names {
+        let source_path =
+            workspace_root.join("target/criterion").join(bench_name).join("new/estimates.json");
+        let dest_path = dummy_bench_results_dir.join(format!("{}_estimates.json", bench_name));
+
+        // Read, parse, and write the result to the results directory.
+        let data = fs::read_to_string(&source_path)
+            .unwrap_or_else(|e| panic!("Failed to read {}: {}", source_path.display(), e));
+        let json: serde_json::Value = serde_json::from_str(&data).expect("Failed to parse JSON");
+        let pretty_json = serde_json::to_string_pretty(&json).expect("Failed to serialize JSON");
+        fs::write(&dest_path, pretty_json).expect("Failed to write benchmark result");
+    }
+
+    // 3) Deserialize and validate the saved results
+    assert_deserialize_dummy_bench_estimates(&dummy_bench_results_dir, dummy_bench_names);
+}
+
+#[rstest]
+/// Test that Estimates can be deserialized from the saved results.
+fn deserialize_dummy_bench_estimates(dummy_bench_results_dir: PathBuf, dummy_bench_names: &[&str]) {
+    assert_deserialize_dummy_bench_estimates(&dummy_bench_results_dir, dummy_bench_names);
+}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#[cfg(test)]`
	`2`	`+pub(crate) mod dummy_bench;`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+#[cfg(test)]`
	`2`	`+pub(crate) mod benches;`
	`3`	`+pub mod types;`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+pub mod estimates;`
	`2`	`+#[cfg(test)]`
	`3`	`+mod estimates_test;`