Skip to content

Commit 5027767

Browse files
committed
Add unit test to ensure consistent metadata encoding
1 parent 7019c1e commit 5027767

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

arrow-ipc/src/writer.rs

+45
Original file line numberDiff line numberDiff line change
@@ -1854,6 +1854,7 @@ fn pad_to_alignment(alignment: u8, len: usize) -> usize {
18541854

18551855
#[cfg(test)]
18561856
mod tests {
1857+
use std::hash::Hasher;
18571858
use std::io::Cursor;
18581859
use std::io::Seek;
18591860

@@ -3306,4 +3307,48 @@ mod tests {
33063307

33073308
Ok(())
33083309
}
3310+
3311+
#[test]
3312+
fn test_metadata_encoding_ordering() {
3313+
fn create_hash() -> u64 {
3314+
let metadata: HashMap<String, String> = [
3315+
("a", "1"), //
3316+
("b", "2"), //
3317+
("c", "3"), //
3318+
("d", "4"), //
3319+
("e", "5"), //
3320+
]
3321+
.into_iter()
3322+
.map(|(k, v)| (k.to_owned(), v.to_owned()))
3323+
.collect();
3324+
3325+
// Set metadata on both the schema and a field within it.
3326+
let schema = Arc::new(
3327+
Schema::new(vec![
3328+
Field::new("a", DataType::Int64, true).with_metadata(metadata.clone())
3329+
])
3330+
.with_metadata(metadata)
3331+
.clone(),
3332+
);
3333+
let batch = RecordBatch::new_empty(schema.clone());
3334+
3335+
let mut bytes = Vec::new();
3336+
let mut w = StreamWriter::try_new(&mut bytes, batch.schema_ref()).unwrap();
3337+
w.write(&batch).unwrap();
3338+
w.finish().unwrap();
3339+
3340+
let mut h = std::hash::DefaultHasher::new();
3341+
h.write(&bytes);
3342+
h.finish()
3343+
}
3344+
3345+
let expected = create_hash();
3346+
3347+
// Since there is randomness in the HashMap and we cannot specify our
3348+
// own Hasher for the implementation used for metadata, run the above
3349+
// code 20x and verify it does not change. This is not perfect but it
3350+
// should be good enough.
3351+
let all_passed = [0..20].into_iter().all(|_| create_hash() == expected);
3352+
assert!(all_passed);
3353+
}
33093354
}

0 commit comments

Comments
 (0)