ci: deserialize cargo bench result

AvivYossef-starkware · AvivYossef-starkware · commit bd42739ee3d6 · 2025-10-21T21:58:57.000+03:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/bench_tools/Cargo.toml b/crates/bench_tools/Cargo.toml
@@ -8,3 +8,15 @@ workspace = true
 
 [dependencies]
 clap = { workspace = true, features = ["derive"] }
+criterion.workspace = true
+serde = { workspace = true, features = ["derive"] }
+
+[dev-dependencies]
+glob.workspace = true
+rstest.workspace = true
+serde_json.workspace = true
+
+[[bench]]
+harness = false
+name = "dummy_bench"
+path = "src/benches/dummy_bench.rs"
diff --git a/crates/bench_tools/data/dummy_benches_result/dummy_sum_1000_estimates.json b/crates/bench_tools/data/dummy_benches_result/dummy_sum_1000_estimates.json
@@ -0,0 +1,47 @@
+{
+  "mean": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.4627450463077952,
+      "upper_bound": 0.4672721018359347
+    },
+    "point_estimate": 0.464843138908376,
+    "standard_error": 0.001159579008842323
+  },
+  "median": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.46018939259554725,
+      "upper_bound": 0.4630457711597157
+    },
+    "point_estimate": 0.46110773204298744,
+    "standard_error": 0.0007700792406655315
+  },
+  "median_abs_dev": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.004389190374631315,
+      "upper_bound": 0.008068153741443406
+    },
+    "point_estimate": 0.0057180067999875826,
+    "standard_error": 0.0008869433017250109
+  },
+  "slope": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.4613537171491014,
+      "upper_bound": 0.4679905568783797
+    },
+    "point_estimate": 0.4641046136274776,
+    "standard_error": 0.001730736547763549
+  },
+  "std_dev": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.007242933241527838,
+      "upper_bound": 0.01557482754536567
+    },
+    "point_estimate": 0.01165848802634422,
+    "standard_error": 0.00216291128423961
+  }
+}
diff --git a/crates/bench_tools/data/dummy_benches_result/dummy_sum_100_estimates.json b/crates/bench_tools/data/dummy_benches_result/dummy_sum_100_estimates.json
@@ -0,0 +1,47 @@
+{
+  "mean": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.45957162353881537,
+      "upper_bound": 0.47252808549400704
+    },
+    "point_estimate": 0.4656174818033215,
+    "standard_error": 0.0033071130641006297
+  },
+  "median": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.45151445843970833,
+      "upper_bound": 0.4538904649825635
+    },
+    "point_estimate": 0.4526323188039608,
+    "standard_error": 0.0006992971024409143
+  },
+  "median_abs_dev": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.005240482428601258,
+      "upper_bound": 0.010332193401481504
+    },
+    "point_estimate": 0.007727865411134973,
+    "standard_error": 0.001301158952978215
+  },
+  "slope": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.455400693078925,
+      "upper_bound": 0.46369875851255066
+    },
+    "point_estimate": 0.45933814542565166,
+    "standard_error": 0.002119888457921763
+  },
+  "std_dev": {
+    "confidence_interval": {
+      "confidence_level": 0.95,
+      "lower_bound": 0.02332892198500483,
+      "upper_bound": 0.041110549300589966
+    },
+    "point_estimate": 0.03317521516535013,
+    "standard_error": 0.004530135795169152
+  }
+}
diff --git a/crates/bench_tools/src/benches.rs b/crates/bench_tools/src/benches.rs
@@ -0,0 +1,2 @@
+#[cfg(test)]
+pub(crate) mod dummy_bench;
diff --git a/crates/bench_tools/src/benches/dummy_bench.rs b/crates/bench_tools/src/benches/dummy_bench.rs
@@ -0,0 +1,23 @@
+use std::hint::black_box;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+#[allow(dead_code)]
+fn dummy_function(n: u64) -> u64 {
+    // Simple function that does some work
+    (0..n).sum()
+}
+
+/// Example benchmark function that demonstrates how to use Criterion for benchmarking.
+/// This is used to test the benchmarking infrastructure and generate sample benchmark results
+/// that can be parsed by the bench_tools framework.
+#[allow(dead_code)]
+fn dummy_benchmark(c: &mut Criterion) {
+    // black_box prevents the compiler from optimizing away the function call during benchmarking
+    c.bench_function("dummy_sum_100", |b| b.iter(|| black_box(dummy_function(100))));
+
+    c.bench_function("dummy_sum_1000", |b| b.iter(|| black_box(dummy_function(1000))));
+}
+
+criterion_group!(benches, dummy_benchmark);
+criterion_main!(benches);
diff --git a/crates/bench_tools/src/lib.rs b/crates/bench_tools/src/lib.rs
@@ -0,0 +1,3 @@
+#[cfg(test)]
+pub(crate) mod benches;
+pub mod types;
diff --git a/crates/bench_tools/src/types.rs b/crates/bench_tools/src/types.rs
@@ -0,0 +1,3 @@
+pub mod estimates;
+#[cfg(test)]
+mod estimates_test;
diff --git a/crates/bench_tools/src/types/estimates.rs b/crates/bench_tools/src/types/estimates.rs
@@ -0,0 +1,30 @@
+use serde::Deserialize;
+
+/// Criterion benchmark estimates.
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct Estimates {
+    pub mean: Stat,
+    pub median: Stat,
+    pub std_dev: Stat,
+    pub median_abs_dev: Stat,
+    pub slope: Option<Stat>,
+}
+
+/// Statistical estimate with confidence interval.
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct Stat {
+    pub point_estimate: f64,
+    pub standard_error: f64,
+    pub confidence_interval: ConfidenceInterval,
+}
+
+/// Confidence interval bounds.
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct ConfidenceInterval {
+    pub confidence_level: f64,
+    pub lower_bound: f64,
+    pub upper_bound: f64,
+}
diff --git a/crates/bench_tools/src/types/estimates_test.rs b/crates/bench_tools/src/types/estimates_test.rs
@@ -0,0 +1,110 @@
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+use rstest::{fixture, rstest};
+
+use crate::types::estimates::Estimates;
+
+/// Test fixture: Returns the bench_tools crate directory.
+#[fixture]
+fn manifest_dir() -> PathBuf {
+    std::env::var("CARGO_MANIFEST_DIR")
+        .map(PathBuf::from)
+        .unwrap_or_else(|_| std::env::current_dir().unwrap())
+}
+
+/// Returns the data directory of the crate.
+#[fixture]
+fn data_dir(manifest_dir: PathBuf) -> PathBuf {
+    manifest_dir.join("data/dummy_benches_result")
+}
+
+/// Returns the workspace root directory (two levels up from the crate).
+#[fixture]
+fn workspace_root(manifest_dir: PathBuf) -> PathBuf {
+    manifest_dir.parent().unwrap().parent().unwrap().to_path_buf()
+}
+
+/// Helper function to deserialize dummy bench estimates JSON files in a directory.
+fn assert_deserialize_dummy_bench_estimates(data_dir: &Path) {
+    // Collect dummy benchmark estimate files.
+    let bench_names = vec!["dummy_sum_100", "dummy_sum_1000"];
+    let mut files: Vec<PathBuf> = Vec::new();
+
+    for bench_name in bench_names {
+        let path = data_dir.join(format!("{}_estimates.json", bench_name));
+        if path.exists() {
+            files.push(path);
+        }
+    }
+
+    assert!(!files.is_empty(), "No dummy benchmark estimate files found in {}", data_dir.display());
+
+    // Deserialize each file in the data directory.
+    for path in &files {
+        let data = fs::read_to_string(path)
+            .unwrap_or_else(|e| panic!("Failed to read {}: {}", path.display(), e));
+
+        let _est: Estimates = serde_json::from_str(&data).unwrap_or_else(|e| {
+            panic!("Failed to deserialize {}: {}\nContent: {}", path.display(), e, data)
+        });
+    }
+}
+
+#[rstest]
+#[ignore]
+/// Run dummy benchmark and deserialize the results.
+fn run_dummy_bench_and_deserialize_estimates(workspace_root: PathBuf, data_dir: PathBuf) {
+    // 1) Run dummy benchmark.
+    let status = Command::new("cargo")
+        .args(["bench", "-p", "bench_tools", "--bench", "dummy_bench"])
+        .status()
+        .expect("Failed to spawn `cargo bench`");
+    assert!(status.success(), "`cargo bench` did not exit successfully");
+
+    // 2) Collect only the dummy_bench estimates.json files
+    let bench_names = vec!["dummy_sum_100", "dummy_sum_1000"];
+    let mut files: Vec<PathBuf> = Vec::new();
+
+    for bench_name in bench_names {
+        let path =
+            workspace_root.join("target/criterion").join(bench_name).join("new/estimates.json");
+        if path.exists() {
+            files.push(path);
+        }
+    }
+
+    assert!(!files.is_empty(), "No dummy_bench results found; did the benchmark run successfully?");
+
+    // 3) Save results to bench_tools/data.
+    fs::create_dir_all(&data_dir).expect("Failed to create data directory");
+    for path in &files {
+        if let Some(filename) = path.file_name() {
+            let bench_name = path
+                .parent()
+                .and_then(|p| p.parent())
+                .and_then(|p| p.file_name())
+                .and_then(|n| n.to_str())
+                .unwrap_or("unknown");
+            let dest = data_dir.join(format!("{}_{}", bench_name, filename.to_str().unwrap()));
+
+            // Read, parse, and write the result to the data directory.
+            let data = fs::read_to_string(path).expect("Failed to read benchmark result");
+            let json: serde_json::Value =
+                serde_json::from_str(&data).expect("Failed to parse JSON");
+            let pretty_json =
+                serde_json::to_string_pretty(&json).expect("Failed to serialize JSON");
+            fs::write(&dest, pretty_json).expect("Failed to write benchmark result");
+        }
+    }
+
+    // 4) Deserialize and validate the saved results
+    assert_deserialize_dummy_bench_estimates(&data_dir);
+}
+
+#[rstest]
+/// Test that Estimates can be deserialized from the saved results.
+fn deserialize_dummy_bench_estimates(data_dir: PathBuf) {
+    assert_deserialize_dummy_bench_estimates(&data_dir);
+}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#[cfg(test)]`
	`2`	`+pub(crate) mod dummy_bench;`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+#[cfg(test)]`
	`2`	`+pub(crate) mod benches;`
	`3`	`+pub mod types;`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+pub mod estimates;`
	`2`	`+#[cfg(test)]`
	`3`	`+mod estimates_test;`