Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,12 @@ rayon = "1.8.0"

[build-dependencies]
protobuf-codegen-pure = "2.22"

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
rand = "0.8"

[[bench]]
name = "score_block"
path = "bench/score_block.rs"
harness = false
135 changes: 135 additions & 0 deletions bench/score_block.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use rand::prelude::*;
use rand::seq::SliceRandom;

// Import the block_score function from the library
use bmp::index::forward_index::{block_score, block_score_scalar};

/// Generate realistic test data for benchmarking
struct TestData {
query: Vec<(u16, u8)>,
document: Vec<(u16, (Vec<u8>, Vec<u8>))>,
block_size: usize,
name: String,
}

impl TestData {
/// Create test data with specified parameters
fn new(
query_length: usize,
num_terms: usize,
avg_postings_per_term: usize,
block_size: usize,
name: &str,
) -> Self {
let mut rng = StdRng::seed_from_u64(42); // Fixed seed for reproducibility

// Generate query terms with weights
let mut query_terms: Vec<u16> = (0..num_terms).map(|i| i as u16).collect();
query_terms.shuffle(&mut rng);
query_terms.truncate(query_length);

let query = query_terms
.into_iter()
.map(|term_id| (term_id, rng.gen_range(1..=255)))
.collect();

// Generate document structure
let mut document = Vec::new();
for term_id in 0..num_terms {
let num_postings = rng.gen_range(1..=avg_postings_per_term * 2);
let mut doc_ids = Vec::new();
let mut scores = Vec::new();

for _ in 0..num_postings {
let doc_id = rng.gen_range(0..block_size);
let score = rng.gen_range(1..=255);
doc_ids.push(doc_id as u8);
scores.push(score);
}

if !doc_ids.is_empty() {
document.push((term_id as u16, (doc_ids, scores)));
}
}

// Sort document by term_id for realistic structure
document.sort_by_key(|(term_id, _)| *term_id);

TestData {
query,
document,
block_size,
name: name.to_string(),
}
}

/// Create sparse test data (fewer matches)
fn sparse(query_length: usize, block_size: usize) -> Self {
Self::new(query_length, 500, 2, block_size, "sparse")
}

/// Create dense test data (more matches)
fn dense(query_length: usize, block_size: usize) -> Self {
Self::new(query_length, 50, 20, block_size, "dense")
}

/// Create balanced test data
fn balanced(query_length: usize, block_size: usize) -> Self {
Self::new(query_length, 500, 4, block_size, "balanced")
}

fn name(&self) -> String {
self.name.clone()
}
}


fn benchmark_block_score_varying_query_lengths(c: &mut Criterion) {
let mut group = c.benchmark_group("block_score_query_lengths");

let query_lengths = [5, 10, 25, 50];
let block_size = 16;

for &query_length in &query_lengths {
for test_data in [TestData::balanced(query_length, block_size), TestData::sparse(query_length, block_size), TestData::dense(query_length, block_size)] {

group.throughput(Throughput::Elements(query_length as u64));
group.bench_with_input(
BenchmarkId::new(format!("{}", test_data.name()), block_size),
&test_data,
|b, data| {
b.iter(|| {
block_score(
black_box(&data.query),
black_box(&data.document),
black_box(data.block_size),
)
})
},
);

group.bench_with_input(
BenchmarkId::new(format!("scalar_{}", test_data.name()), block_size),
&test_data,
|b, data| {
b.iter(|| {
block_score_scalar(
black_box(&data.query),
black_box(&data.document),
black_box(data.block_size),
)
})
},
);
}
}
group.finish();
}

criterion_group!(
benches,
benchmark_block_score_varying_query_lengths
);

criterion_main!(benches);
6 changes: 6 additions & 0 deletions bin/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ struct Args {
}
fn main() -> Result<()> {
let args = Args::from_args();
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx512bw") {
eprintln!("avx supported");
}
}

// 1. Load the index
eprintln!("Loading the index");
Expand Down
1 change: 1 addition & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::fs::{read_to_string, File};
use std::io::{BufWriter, Write};
use std::path::Path;


fn main() {
let out_dir_env = env::var_os("OUT_DIR").unwrap();
let out_dir = Path::new(&out_dir_env);
Expand Down
2 changes: 1 addition & 1 deletion src/ciff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ fn convert_to_bmp(input: &Path, output: &Path, bsize: usize, compress_range: boo
for (_, block) in b_forward_index.data.iter().enumerate() {
tot += block.len();
tot_avg_docs +=
block.iter().map(|(_, v)| v.len()).sum::<usize>() as f32 / block.len() as f32;
block.iter().map(|(_, v)| v.0.len()).sum::<usize>() as f32 / block.len() as f32;
}
eprintln!("avg terms per block: {}", tot / b_forward_index.data.len());
eprintln!(
Expand Down
Loading