Skip to content

Commit 686d085

Browse files
authored
[Variant] ReadOnlyMetadataBuilder borrows its underlying VariantMetadata (#8502)
# Which issue does this PR close? - Relates to #8336 # Rationale for this change While pathfinding variant unshredding code, I noticed that there's a lot of variant metadata cloning going on. It's a 32-byte object, so cloning is not free. And it turns out that ~all read-only metadata builder usage in the code base can work just as easily with a borrowed reference because ultimately we're working with bytes borrowed from a binary view that holds the metadata column. # What changes are included in this PR? Update the `ReadOnlyMetadataBuilder` to borrow the `VariantMetadata` it uses, and update call sites accordingly. # Are these changes tested? Yes, the change affects several unit tests. # Are there any user-facing changes? Signature of `ReadOnlyMetadataBuilder::new` has changed.
1 parent 883380b commit 686d085

File tree

3 files changed

+16
-15
lines changed

3 files changed

+16
-15
lines changed

parquet-variant-compute/src/shred_variant.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
267267
};
268268

269269
// Route the object's fields by name as either shredded or unshredded
270-
let mut builder = self.value_builder.builder_ext(value.metadata().clone());
270+
let mut builder = self.value_builder.builder_ext(value.metadata());
271271
let mut object_builder = builder.try_new_object()?;
272272
let mut seen = std::collections::HashSet::new();
273273
let mut partially_shredded = false;
@@ -794,7 +794,7 @@ mod tests {
794794
let object_with_foo_field = |i| {
795795
use parquet_variant::{ParentState, ValueBuilder, VariantMetadata};
796796
let metadata = VariantMetadata::new(metadata.value(i));
797-
let mut metadata_builder = ReadOnlyMetadataBuilder::new(metadata.clone());
797+
let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
798798
let mut value_builder = ValueBuilder::new();
799799
let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
800800
ObjectBuilder::new(state, false)

parquet-variant-compute/src/variant_array_builder.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,8 @@ impl VariantValueArrayBuilder {
295295
/// builder.append_value(Variant::from(42));
296296
/// ```
297297
pub fn append_value(&mut self, value: Variant<'_, '_>) {
298-
self.builder_ext(value.metadata().clone())
298+
// NOTE: Have to clone because the builder consumes `value`
299+
self.builder_ext(&value.metadata().clone())
299300
.append_value(value);
300301
}
301302

@@ -313,7 +314,7 @@ impl VariantValueArrayBuilder {
313314
/// let Variant::Object(obj) = value else {
314315
/// panic!("Not a variant object");
315316
/// };
316-
/// let mut metadata_builder = ReadOnlyMetadataBuilder::new(obj.metadata.clone());
317+
/// let mut metadata_builder = ReadOnlyMetadataBuilder::new(&obj.metadata);
317318
/// let state = value_array_builder.parent_state(&mut metadata_builder);
318319
/// let mut object_builder = ObjectBuilder::new(state, false);
319320
/// for (field_name, field_value) in obj.iter() {
@@ -339,7 +340,7 @@ impl VariantValueArrayBuilder {
339340
/// parameter (similar to the way [`parquet_variant::ObjectFieldBuilder`] hides field names).
340341
pub fn builder_ext<'a>(
341342
&'a mut self,
342-
metadata: VariantMetadata<'a>,
343+
metadata: &'a VariantMetadata<'a>,
343344
) -> VariantValueArrayBuilderExt<'a> {
344345
VariantValueArrayBuilderExt {
345346
metadata_builder: ReadOnlyMetadataBuilder::new(metadata),
@@ -548,7 +549,7 @@ mod test {
548549

549550
// filtering fields takes more work because we need to manually create an object builder
550551
let value = array.value(1);
551-
let mut builder = value_builder.builder_ext(value.metadata().clone());
552+
let mut builder = value_builder.builder_ext(value.metadata());
552553
builder
553554
.new_object()
554555
.with_field("name", value.get_object_field("name").unwrap())
@@ -557,7 +558,7 @@ mod test {
557558

558559
// same bytes, but now nested and duplicated inside a list
559560
let value = array.value(2);
560-
let mut builder = value_builder.builder_ext(value.metadata().clone());
561+
let mut builder = value_builder.builder_ext(value.metadata());
561562
builder
562563
.new_list()
563564
.with_value(value.clone())

parquet-variant/src/builder.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -498,15 +498,15 @@ impl MetadataBuilder for WritableMetadataBuilder {
498498
/// time `finish` is called, a different trait impl will be needed.
499499
#[derive(Debug)]
500500
pub struct ReadOnlyMetadataBuilder<'m> {
501-
metadata: VariantMetadata<'m>,
501+
metadata: &'m VariantMetadata<'m>,
502502
// A cache that tracks field names this builder has already seen, because finding the field id
503503
// for a given field name is expensive -- O(n) for a large and unsorted metadata dictionary.
504504
known_field_names: HashMap<&'m str, u32>,
505505
}
506506

507507
impl<'m> ReadOnlyMetadataBuilder<'m> {
508508
/// Creates a new read-only metadata builder from the given metadata dictionary.
509-
pub fn new(metadata: VariantMetadata<'m>) -> Self {
509+
pub fn new(metadata: &'m VariantMetadata<'m>) -> Self {
510510
Self {
511511
metadata,
512512
known_field_names: HashMap::new(),
@@ -3357,7 +3357,7 @@ mod tests {
33573357

33583358
// Use the metadata to build new variant values
33593359
let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
3360-
let mut metadata_builder = ReadOnlyMetadataBuilder::new(metadata);
3360+
let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
33613361
let mut value_builder = ValueBuilder::new();
33623362

33633363
{
@@ -3390,7 +3390,7 @@ mod tests {
33903390

33913391
// Use the metadata to build new variant values
33923392
let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
3393-
let mut metadata_builder = ReadOnlyMetadataBuilder::new(metadata);
3393+
let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
33943394
let mut value_builder = ValueBuilder::new();
33953395

33963396
{
@@ -3438,7 +3438,7 @@ mod tests {
34383438

34393439
// Copy using the new bytes API
34403440
let metadata = VariantMetadata::new(&metadata);
3441-
let mut metadata = ReadOnlyMetadataBuilder::new(metadata);
3441+
let mut metadata = ReadOnlyMetadataBuilder::new(&metadata);
34423442
let mut builder2 = ValueBuilder::new();
34433443
let state = ParentState::variant(&mut builder2, &mut metadata);
34443444
ValueBuilder::append_variant_bytes(state, variant1);
@@ -3466,7 +3466,7 @@ mod tests {
34663466

34673467
// Create a new object copying subset of fields interleaved with new ones
34683468
let metadata2 = VariantMetadata::new(&metadata1);
3469-
let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
3469+
let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
34703470
let mut builder2 = ValueBuilder::new();
34713471
let state = ParentState::variant(&mut builder2, &mut metadata2);
34723472
{
@@ -3530,7 +3530,7 @@ mod tests {
35303530

35313531
// Create a new list copying subset of elements interleaved with new ones
35323532
let metadata2 = VariantMetadata::new(&metadata1);
3533-
let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
3533+
let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
35343534
let mut builder2 = ValueBuilder::new();
35353535
let state = ParentState::variant(&mut builder2, &mut metadata2);
35363536
{
@@ -3626,7 +3626,7 @@ mod tests {
36263626

36273627
// Create filtered/modified version: only copy active users and inject new data
36283628
let metadata2 = VariantMetadata::new(&metadata1);
3629-
let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
3629+
let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
36303630
let mut builder2 = ValueBuilder::new();
36313631
let state = ParentState::variant(&mut builder2, &mut metadata2);
36323632
{

0 commit comments

Comments
 (0)