Skip to content

Commit c4d3c9e

Browse files
authored
GH-109: Implement Vector Validators for StringView (#886)
## What's Changed Implement Vector Validators for StringView. Closes #109.
1 parent a4f3f3e commit c4d3c9e

File tree

5 files changed

+54
-13
lines changed

5 files changed

+54
-13
lines changed

vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,22 @@ private void validateVectorCommon(ValueVector vector) {
5252

5353
if (vector instanceof FieldVector) {
5454
FieldVector fieldVector = (FieldVector) vector;
55-
// TODO: https://github.com/apache/arrow/issues/41734
5655
int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType);
57-
validateOrThrow(
58-
fieldVector.getFieldBuffers().size() == typeBufferCount,
59-
"Expected %s buffers in vector of type %s, got %s.",
60-
typeBufferCount,
61-
vector.getField().getType().toString(),
62-
fieldVector.getFieldBuffers().size());
56+
if (TypeLayout.getTypeLayout(arrowType).isFixedBufferCount()) {
57+
validateOrThrow(
58+
fieldVector.getFieldBuffers().size() == typeBufferCount,
59+
"Expected %s buffers in vector of type %s, got %s.",
60+
typeBufferCount,
61+
vector.getField().getType().toString(),
62+
fieldVector.getFieldBuffers().size());
63+
} else {
64+
validateOrThrow(
65+
fieldVector.getFieldBuffers().size() >= typeBufferCount,
66+
"Expected at least %s buffers in vector of type %s, got %s.",
67+
typeBufferCount,
68+
vector.getField().getType().toString(),
69+
fieldVector.getFieldBuffers().size());
70+
}
6371
}
6472
}
6573

@@ -158,7 +166,12 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
158166

159167
@Override
160168
public Void visit(BaseVariableWidthViewVector vector, Void value) {
161-
throw new UnsupportedOperationException("View vectors are not supported.");
169+
final int valueCount = vector.getValueCount();
170+
validateVectorCommon(vector);
171+
validateOrThrow(vector.getFieldBuffers().size() >= 2, "Expected at least 2 buffers.");
172+
validateValidityBuffer(vector, valueCount);
173+
validateDataBuffer(vector, (long) valueCount * BaseVariableWidthViewVector.ELEMENT_SIZE);
174+
return null;
162175
}
163176

164177
@Override

vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
121121

122122
@Override
123123
public Void visit(BaseVariableWidthViewVector vector, Void value) {
124-
throw new UnsupportedOperationException("View vectors are not supported.");
124+
vector.validateScalars();
125+
return null;
125126
}
126127

127128
@Override

vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@
6161
import org.apache.arrow.vector.ValueVector;
6262
import org.apache.arrow.vector.VarBinaryVector;
6363
import org.apache.arrow.vector.VarCharVector;
64+
import org.apache.arrow.vector.ViewVarBinaryVector;
65+
import org.apache.arrow.vector.ViewVarCharVector;
6466
import org.apache.arrow.vector.compare.VectorVisitor;
6567
import org.apache.arrow.vector.complex.DenseUnionVector;
6668
import org.apache.arrow.vector.complex.FixedSizeListVector;
@@ -380,7 +382,12 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
380382

381383
@Override
382384
public Void visit(BaseVariableWidthViewVector vector, Void value) {
383-
throw new UnsupportedOperationException("View vectors are not supported.");
385+
if (vector instanceof ViewVarCharVector) {
386+
validateVectorCommon(vector, ArrowType.Utf8View.class);
387+
} else if (vector instanceof ViewVarBinaryVector) {
388+
validateVectorCommon(vector, ArrowType.BinaryView.class);
389+
}
390+
return null;
384391
}
385392

386393
@Override

vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,13 @@ public Void visit(BaseLargeVariableWidthVector left, Void value) {
107107
}
108108

109109
@Override
110-
public Void visit(BaseVariableWidthViewVector left, Void value) {
111-
throw new UnsupportedOperationException("View vectors are not supported.");
110+
public Void visit(BaseVariableWidthViewVector vector, Void value) {
111+
if (vector.getValueCount() > 0) {
112+
if (vector.getDataBuffer() == null || vector.getDataBuffer().capacity() == 0) {
113+
throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
114+
}
115+
}
116+
return null;
112117
}
113118

114119
@Override

vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import org.apache.arrow.vector.util.ReusableByteArray;
6262
import org.apache.arrow.vector.util.Text;
6363
import org.apache.arrow.vector.util.TransferPair;
64+
import org.apache.arrow.vector.validate.ValidateUtil;
6465
import org.junit.jupiter.api.AfterEach;
6566
import org.junit.jupiter.api.BeforeEach;
6667
import org.junit.jupiter.api.Test;
@@ -2445,7 +2446,7 @@ public void testSplitAndTransferWithLongStringsOnValiditySplit() {
24452446
final ViewVarBinaryVector sourceVector =
24462447
newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) {
24472448
testSplitAndTransferOnValiditySplitHelper(
2448-
targetVector, sourceVector, startIndex, length, data);
2449+
targetVector, sourceVector, startIndex, length, binaryData);
24492450
}
24502451
}
24512452

@@ -2852,4 +2853,18 @@ public void testVectorLoadUnloadOnMixedTypes() {
28522853
}
28532854
}
28542855
}
2856+
2857+
@Test
2858+
public void testValidate() {
2859+
try (final ViewVarCharVector vector = new ViewVarCharVector("v", allocator)) {
2860+
vector.validateFull();
2861+
setVector(vector, STR1, STR2, STR3);
2862+
vector.validateFull();
2863+
2864+
vector.getDataBuffer().capacity(0);
2865+
ValidateUtil.ValidateException e =
2866+
assertThrows(ValidateUtil.ValidateException.class, () -> vector.validate());
2867+
assertTrue(e.getMessage().contains("Not enough capacity for data buffer"));
2868+
}
2869+
}
28552870
}

0 commit comments

Comments
 (0)