Skip to content

GH-725: Added ExtensionReader #726

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions vector/src/main/codegen/templates/AbstractFieldReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,23 @@ public void copyAsField(String name, ${name}Writer writer) {
}

</#list></#list>

public void read(ExtensionHolder holder) {
fail("Extension");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be clearer to use the signature as the error message (like other methods do below)?

}

public void read(int arrayIndex, ExtensionHolder holder) {
fail("RepeatedExtension");
}

public void copyAsValue(AbstractExtensionTypeWriter writer) {
fail("CopyAsValueExtension");
}

public void copyAsField(String name, AbstractExtensionTypeWriter writer) {
fail("CopyAsFieldExtension");
}

public FieldReader reader(String name) {
fail("reader(String name)");
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ public MapWriter map(boolean keysSorted) {

@Override
public ExtensionWriter extension(ArrowType arrowType) {
return getWriter(MinorType.EXTENSIONTYPE).extension(arrowType);
return getWriter(MinorType.LIST).extension(arrowType);
}

@Override
Expand Down Expand Up @@ -325,7 +325,7 @@ public MapWriter map(String name, boolean keysSorted) {

@Override
public ExtensionWriter extension(String name, ArrowType arrowType) {
return getWriter(MinorType.EXTENSIONTYPE).extension(name, arrowType);
return getWriter(MinorType.STRUCT).extension(name, arrowType);
}

<#list vv.types as type><#list type.minor as minor>
Expand Down
2 changes: 1 addition & 1 deletion vector/src/main/codegen/templates/BaseReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public interface RepeatedMapReader extends MapReader{

public interface ScalarReader extends
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list>
BaseReader {}
ExtensionReader, BaseReader {}

interface ComplexReader{
StructReader rootAsStruct();
Expand Down
4 changes: 4 additions & 0 deletions vector/src/main/codegen/templates/NullReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ public void read(int arrayIndex, Nullable${name}Holder holder){
}
</#list></#list>

public void read(ExtensionHolder holder) {
holder.isSet = 0;
}

public int size(){
return 0;
}
Expand Down
4 changes: 4 additions & 0 deletions vector/src/main/codegen/templates/PromotableWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,10 @@ public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) {
getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory);
}

public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory, ArrowType arrowType) {
getWriter(MinorType.EXTENSIONTYPE, arrowType).addExtensionTypeWriterFactory(factory);
}

@Override
public void allocate() {
getWriter().allocate();
Expand Down
11 changes: 8 additions & 3 deletions vector/src/main/codegen/templates/UnionListWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public class Union${listName}Writer extends AbstractFieldWriter {
private boolean inStruct = false;
private boolean listStarted = false;
private String structName;
private ArrowType extensionType;
<#if listName == "LargeList" || listName == "LargeListView">
private static final long OFFSET_WIDTH = 8;
<#else>
Expand Down Expand Up @@ -203,8 +204,8 @@ public MapWriter map(String name, boolean keysSorted) {

@Override
public ExtensionWriter extension(ArrowType arrowType) {
writer.extension(arrowType);
return writer;
this.extensionType = arrowType;
return this;
}
@Override
public ExtensionWriter extension(String name, ArrowType arrowType) {
Expand Down Expand Up @@ -337,13 +338,17 @@ public void writeNull() {
@Override
public void writeExtension(Object value) {
writer.writeExtension(value);
writer.setPosition(writer.idx() + 1);
}

@Override
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
writer.addExtensionTypeWriterFactory(var1);
writer.addExtensionTypeWriterFactory(var1, extensionType);
}

public void write(ExtensionHolder var1) {
writer.write(var1);
writer.setPosition(writer.idx() + 1);
}

<#list vv.types as type>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector.complex.reader;

import org.apache.arrow.vector.holders.ExtensionHolder;

/** Interface for reading extension types. Extends the functionality of {@link BaseReader}. */
public interface ExtensionReader extends BaseReader {

/**
* Reads to the given extension holder.
*
* @param holder the {@link ExtensionHolder} to read
*/
void read(ExtensionHolder holder);

/**
* Reads and returns an object representation of the extension type.
*
* @return the object representation of the extension type
*/
Object readObject();

/**
* Checks if the current value is set.
*
* @return true if the value is set, false otherwise
*/
boolean isSet();
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,22 @@
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.UUID;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.AutoCloseables;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.impl.UnionListReader;
import org.apache.arrow.vector.complex.impl.UnionListWriter;
import org.apache.arrow.vector.complex.impl.UuidWriterFactory;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter;
import org.apache.arrow.vector.holder.UuidHolder;
import org.apache.arrow.vector.holders.DurationHolder;
import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
import org.apache.arrow.vector.holders.TimeStampMilliTZHolder;
Expand All @@ -41,6 +47,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.UuidType;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
Expand Down Expand Up @@ -1198,6 +1205,71 @@ public void testGetTransferPairWithField() {
}
}

@Test
public void testListVectorWithExtensionType() throws Exception {
final FieldType type = FieldType.nullable(new UuidType());
try (final ListVector inVector = new ListVector("list", allocator, type, null)) {
UnionListWriter writer = inVector.getWriter();
writer.allocate();
writer.setPosition(0);
UUID u1 = UUID.randomUUID();
UUID u2 = UUID.randomUUID();
writer.startList();
ExtensionWriter extensionWriter = writer.extension(new UuidType());
extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory());
extensionWriter.writeExtension(u1);
extensionWriter.writeExtension(u2);
writer.endList();

writer.setValueCount(1);

FieldReader reader = inVector.getReader();
assertTrue(reader.isSet(), "shouldn't be null");
Object result = inVector.getObject(0);
ArrayList<UUID> resultSet = (ArrayList<UUID>) result;
assertEquals(2, resultSet.size());
assertEquals(u1, resultSet.get(0));
assertEquals(u2, resultSet.get(1));
}
}

@Test
public void testListVectorReaderForExtensionType() throws Exception {
final FieldType type = FieldType.nullable(new UuidType());
try (final ListVector inVector = new ListVector("list", allocator, type, null)) {
UnionListWriter writer = inVector.getWriter();
writer.allocate();
writer.setPosition(0);
UUID u1 = UUID.randomUUID();
UUID u2 = UUID.randomUUID();
writer.startList();
ExtensionWriter extensionWriter = writer.extension(new UuidType());
extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory());
extensionWriter.writeExtension(u1);
extensionWriter.writeExtension(u2);
writer.endList();

writer.setValueCount(1);

UnionListReader reader = inVector.getReader();
assertTrue(reader.isSet(), "shouldn't be null");
reader.setPosition(0);
reader.next();
FieldReader uuidReader = reader.reader();
UuidHolder holder = new UuidHolder();
uuidReader.read(holder);
ByteBuffer bb = ByteBuffer.wrap(holder.value);
UUID actualUuid = new UUID(bb.getLong(), bb.getLong());
assertEquals(u1, actualUuid);
reader.next();
uuidReader = reader.reader();
uuidReader.read(holder);
bb = ByteBuffer.wrap(holder.value);
actualUuid = new UUID(bb.getLong(), bb.getLong());
assertEquals(u2, actualUuid);
}
}

private void writeIntValues(UnionListWriter writer, int[] values) {
writer.startList();
for (int v : values) {
Expand Down
13 changes: 13 additions & 0 deletions vector/src/test/java/org/apache/arrow/vector/UuidVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import java.util.UUID;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
import org.apache.arrow.vector.complex.impl.UuidReaderImpl;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.holder.UuidHolder;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.UuidType;
Expand Down Expand Up @@ -79,11 +82,21 @@ public TransferPair makeTransferPair(ValueVector to) {
return new TransferImpl((UuidVector) to);
}

@Override
protected FieldReader getReaderImpl() {
return new UuidReaderImpl(this);
}

public void setSafe(int index, byte[] value) {
getUnderlyingVector().setIndexDefined(index);
getUnderlyingVector().setSafe(index, value);
}

public void get(int index, UuidHolder holder) {
holder.value = getUnderlyingVector().get(index);
holder.isSet = 1;
}

public class TransferImpl implements TransferPair {
UuidVector to;
ValueVector targetUnderlyingVector;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -805,4 +805,29 @@ public void testExtensionType() throws Exception {
assertEquals(u2, uuidVector.getObject(1));
}
}

@Test
public void testExtensionTypeForList() throws Exception {
try (final ListVector container = ListVector.empty(EMPTY_SCHEMA_PATH, allocator);
final UuidVector v =
(UuidVector) container.addOrGetVector(FieldType.nullable(new UuidType())).getVector();
final PromotableWriter writer = new PromotableWriter(v, container)) {
UUID u1 = UUID.randomUUID();
UUID u2 = UUID.randomUUID();
container.allocateNew();
container.setValueCount(1);
writer.addExtensionTypeWriterFactory(new UuidWriterFactory());

writer.setPosition(0);
writer.writeExtension(u1);
writer.setPosition(1);
writer.writeExtension(u2);

container.setValueCount(2);

UuidVector uuidVector = (UuidVector) container.getDataVector();
assertEquals(u1, uuidVector.getObject(0));
assertEquals(u2, uuidVector.getObject(1));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector.complex.impl;

import org.apache.arrow.vector.UuidVector;
import org.apache.arrow.vector.holder.UuidHolder;
import org.apache.arrow.vector.holders.ExtensionHolder;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.Field;

public class UuidReaderImpl extends AbstractFieldReader {

private final UuidVector vector;

public UuidReaderImpl(UuidVector vector) {
super();
this.vector = vector;
}

@Override
public MinorType getMinorType() {
return vector.getMinorType();
}

@Override
public Field getField() {
return vector.getField();
}

@Override
public boolean isSet() {
return !vector.isNull(idx());
}

@Override
public void read(ExtensionHolder holder) {
vector.get(idx(), (UuidHolder) holder);
}

@Override
public void read(int arrayIndex, ExtensionHolder holder) {
vector.get(arrayIndex, (UuidHolder) holder);
}

@Override
public void copyAsValue(AbstractExtensionTypeWriter writer) {
UuidWriterImpl impl = (UuidWriterImpl) writer;
impl.vector.copyFromSafe(idx(), impl.idx(), vector);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we need to override the other methods in AbstractFieldReader?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added annotation

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

read(int arrayIndex, ExtensionHolder holder), copyAsValue, copyAsField still aren't implemented?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added read, copyAsValue. copyAsField can't find any usage for other classes where this impl exists.

}
Loading