From e1f38924ea62a9e50df3e07a12803522575ff5c0 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 23 Apr 2025 09:18:25 +0300 Subject: [PATCH 1/2] GH-725: Added ExtensionReader --- .../templates/AbstractFieldReader.java | 17 +++++++ .../main/codegen/templates/BaseReader.java | 2 +- .../main/codegen/templates/NullReader.java | 4 ++ .../complex/reader/ExtensionReader.java | 44 ++++++++++++++++ .../org/apache/arrow/vector/UuidVector.java | 13 +++++ .../vector/complex/impl/UuidReaderImpl.java | 51 +++++++++++++++++++ .../complex/writer/TestComplexWriter.java | 36 +++++++++++++ 7 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/reader/ExtensionReader.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java diff --git a/vector/src/main/codegen/templates/AbstractFieldReader.java b/vector/src/main/codegen/templates/AbstractFieldReader.java index 25b071fab..7cb12b755 100644 --- a/vector/src/main/codegen/templates/AbstractFieldReader.java +++ b/vector/src/main/codegen/templates/AbstractFieldReader.java @@ -108,6 +108,23 @@ public void copyAsField(String name, ${name}Writer writer) { } + + public void read(ExtensionHolder holder) { + fail("Extension"); + } + + public void read(int arrayIndex, ExtensionHolder holder) { + fail("RepeatedExtension"); + } + + public void copyAsValue(AbstractExtensionTypeWriter writer) { + fail("CopyAsValueExtension"); + } + + public void copyAsField(String name, AbstractExtensionTypeWriter writer) { + fail("CopyAsFieldExtension"); + } + public FieldReader reader(String name) { fail("reader(String name)"); return null; diff --git a/vector/src/main/codegen/templates/BaseReader.java b/vector/src/main/codegen/templates/BaseReader.java index e75e8a297..c52345af2 100644 --- a/vector/src/main/codegen/templates/BaseReader.java +++ b/vector/src/main/codegen/templates/BaseReader.java @@ -73,7 +73,7 @@ public interface RepeatedMapReader extends MapReader{ public interface ScalarReader extends <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, - BaseReader {} + ExtensionReader, BaseReader {} interface ComplexReader{ StructReader rootAsStruct(); diff --git a/vector/src/main/codegen/templates/NullReader.java b/vector/src/main/codegen/templates/NullReader.java index 1d77248e9..88e6ea98e 100644 --- a/vector/src/main/codegen/templates/NullReader.java +++ b/vector/src/main/codegen/templates/NullReader.java @@ -86,6 +86,10 @@ public void read(int arrayIndex, Nullable${name}Holder holder){ } + public void read(ExtensionHolder holder) { + holder.isSet = 0; + } + public int size(){ return 0; } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/reader/ExtensionReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/reader/ExtensionReader.java new file mode 100644 index 000000000..1ba7b2715 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/reader/ExtensionReader.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.reader; + +import org.apache.arrow.vector.holders.ExtensionHolder; + +/** Interface for reading extension types. Extends the functionality of {@link BaseReader}. */ +public interface ExtensionReader extends BaseReader { + + /** + * Reads to the given extension holder. + * + * @param holder the {@link ExtensionHolder} to read + */ + void read(ExtensionHolder holder); + + /** + * Reads and returns an object representation of the extension type. + * + * @return the object representation of the extension type + */ + Object readObject(); + + /** + * Checks if the current value is set. + * + * @return true if the value is set, false otherwise + */ + boolean isSet(); +} diff --git a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java index 5c90d45f6..72ba4aa55 100644 --- a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java @@ -20,6 +20,9 @@ import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.complex.impl.UuidReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.UuidType; @@ -79,11 +82,21 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((UuidVector) to); } + @Override + protected FieldReader getReaderImpl() { + return new UuidReaderImpl(this); + } + public void setSafe(int index, byte[] value) { getUnderlyingVector().setIndexDefined(index); getUnderlyingVector().setSafe(index, value); } + public void get(int index, UuidHolder holder) { + holder.value = getUnderlyingVector().get(index); + holder.isSet = 1; + } + public class TransferImpl implements TransferPair { UuidVector to; ValueVector targetUnderlyingVector; diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java new file mode 100644 index 000000000..81dd51dcd --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; + +public class UuidReaderImpl extends AbstractFieldReader { + + private final UuidVector vector; + + public UuidReaderImpl(UuidVector vector) { + super(); + this.vector = vector; + } + + @Override + public MinorType getMinorType() { + return vector.getMinorType(); + } + + public Field getField() { + return vector.getField(); + } + + public boolean isSet() { + return !vector.isNull(idx()); + } + + public void read(ExtensionHolder holder) { + UuidHolder uuidHolder = (UuidHolder) holder; + vector.get(idx(), uuidHolder); + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 2745386db..16d686844 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -31,6 +31,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -64,6 +65,7 @@ import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionReader; import org.apache.arrow.vector.complex.impl.UnionWriter; +import org.apache.arrow.vector.complex.impl.UuidWriterFactory; import org.apache.arrow.vector.complex.reader.BaseReader.StructReader; import org.apache.arrow.vector.complex.reader.BigIntReader; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -71,9 +73,11 @@ import org.apache.arrow.vector.complex.reader.Float8Reader; import org.apache.arrow.vector.complex.reader.IntReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.DecimalHolder; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; @@ -93,6 +97,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.JsonStringArrayList; @@ -2489,4 +2494,35 @@ public void unionWithVarCharAndBinaryHelpers() throws Exception { "row12", new String(vector.getLargeVarBinaryVector().get(11), StandardCharsets.UTF_8)); } } + + @Test + public void extensionWriterReader() throws Exception { + // test values + UUID u1 = UUID.randomUUID(); + + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + // write + + ComplexWriter writer = new ComplexWriterImpl("root", parent); + StructWriter rootWriter = writer.rootAsStruct(); + + { + ExtensionWriter extensionWriter = rootWriter.extension("uuid1", new UuidType()); + extensionWriter.setPosition(0); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + } + // read + StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); + { + FieldReader uuidReader = rootReader.reader("uuid1"); + uuidReader.setPosition(0); + UuidHolder uuidHolder = new UuidHolder(); + uuidReader.read(uuidHolder); + final ByteBuffer bb = ByteBuffer.wrap(uuidHolder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + } + } + } } From 928519b152ecbe0f6ded51a1dd55f3168de013f4 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Fri, 25 Apr 2025 11:31:47 +0300 Subject: [PATCH 2/2] fixed code style AbstractFieldReader.java --- vector/src/main/codegen/templates/AbstractFieldReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vector/src/main/codegen/templates/AbstractFieldReader.java b/vector/src/main/codegen/templates/AbstractFieldReader.java index 7cb12b755..7e84323b6 100644 --- a/vector/src/main/codegen/templates/AbstractFieldReader.java +++ b/vector/src/main/codegen/templates/AbstractFieldReader.java @@ -124,7 +124,7 @@ public void copyAsValue(AbstractExtensionTypeWriter writer) { public void copyAsField(String name, AbstractExtensionTypeWriter writer) { fail("CopyAsFieldExtension"); } - + public FieldReader reader(String name) { fail("reader(String name)"); return null;