From 29632def2f5d9c90a593f0b3c9001347f02d4497 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Thu, 21 Nov 2024 17:05:37 -0800 Subject: [PATCH 1/6] Add Variant logical type annotation to parquet-java --- .../parquet/schema/LogicalTypeAnnotation.java | 36 +++++++++++++++++++ .../parquet/schema/TestTypeBuilders.java | 21 +++++++++++ .../TestTypeBuildersWithLogicalTypes.java | 25 +++++++++++++ 3 files changed, 82 insertions(+) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 78b0f9a0c1..0e1b438b82 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -56,6 +56,12 @@ protected LogicalTypeAnnotation fromString(List params) { return listType(); } }, + VARIANT { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return variantType(); + } + }, STRING { @Override protected LogicalTypeAnnotation fromString(List params) { @@ -269,6 +275,10 @@ public static ListLogicalTypeAnnotation listType() { return ListLogicalTypeAnnotation.INSTANCE; } + public static VariantLogicalTypeAnnotation variantType() { + return VariantLogicalTypeAnnotation.INSTANCE; + } + public static EnumLogicalTypeAnnotation enumType() { return EnumLogicalTypeAnnotation.INSTANCE; } @@ -1128,6 +1138,28 @@ public int hashCode() { } } + public static class VariantLogicalTypeAnnotation extends LogicalTypeAnnotation { + private static final VariantLogicalTypeAnnotation INSTANCE = new VariantLogicalTypeAnnotation(); + + private VariantLogicalTypeAnnotation() {} + + @Override + public OriginalType toOriginalType() { + // No OriginalType for Variant + return null; + } + + @Override + public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + return logicalTypeAnnotationVisitor.visit(this); + } + + @Override + LogicalTypeToken getType() { + return LogicalTypeToken.VARIANT; + } + } + /** * Implement this interface to visit a logical type annotation in the schema. * The default implementation for each logical type specific visitor method is empty. @@ -1152,6 +1184,10 @@ default Optional visit(ListLogicalTypeAnnotation listLogicalType) { return empty(); } + default Optional visit(VariantLogicalTypeAnnotation variantLogicalType) { + return empty(); + } + default Optional visit(EnumLogicalTypeAnnotation enumLogicalType) { return empty(); } diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java index 579077897f..551f0a4859 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java @@ -50,6 +50,7 @@ import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; import static org.apache.parquet.schema.Type.Repetition.REPEATED; import static org.apache.parquet.schema.Type.Repetition.REQUIRED; +import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.List; @@ -1414,6 +1415,26 @@ public void testTimestampLogicalTypeWithUTCParameter() { Assert.assertEquals(nonUtcMicrosExpected, nonUtcMicrosActual); } + @Test + public void testVariantLogicalType() { + String name = "variant_field"; + GroupType variantExpected = new GroupType( + REQUIRED, + name, + LogicalTypeAnnotation.variantType(), + new PrimitiveType(REQUIRED, BINARY, "metadata"), + new PrimitiveType(REQUIRED, BINARY, "value")); + + GroupType variantActual = Types.buildGroup(REQUIRED) + .addFields( + Types.required(BINARY).named("metadata"), + Types.required(BINARY).named("value")) + .as(LogicalTypeAnnotation.variantType()) + .named(name); + + assertEquals(variantExpected, variantActual); + } + @Test(expected = IllegalArgumentException.class) public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() { Types.required(BINARY) diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java index 54853e8138..d2b12740f3 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java @@ -41,6 +41,8 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96; import static org.apache.parquet.schema.Type.Repetition.REQUIRED; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.util.concurrent.Callable; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; @@ -473,6 +475,29 @@ public void testFloat16LogicalType() { .toString()); } + @Test + public void testVariantLogicalType() { + String name = "variant_field"; + GroupType variant = new GroupType( + REQUIRED, + name, + LogicalTypeAnnotation.variantType(), + Types.required(BINARY).named("metadata"), + Types.required(BINARY).named("value")); + + assertEquals( + "required group variant_field (VARIANT) {\n" + + " required binary metadata;\n" + + " required binary value;\n" + + "}", + variant.toString()); + + LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation(); + assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT, annotation.getType()); + assertNull(annotation.toOriginalType()); + assertTrue(annotation instanceof LogicalTypeAnnotation.VariantLogicalTypeAnnotation); + } + /** * A convenience method to avoid a large number of @Test(expected=...) tests * From e7c97e6374e510a38217e3f7bb6aecbdf29a2fe5 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Mon, 24 Mar 2025 17:56:14 -0700 Subject: [PATCH 2/6] Update to parquet-format 2.11.0 --- .../parquet/schema/LogicalTypeAnnotation.java | 3 ++ .../parquet/parser/TestParquetParser.java | 28 +++++++++++++++++++ .../apache/parquet/format/LogicalTypes.java | 1 + .../converter/ParquetMetadataConverter.java | 8 +++++- pom.xml | 2 +- 5 files changed, 40 insertions(+), 2 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 0e1b438b82..6bd593ea52 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -59,6 +59,9 @@ protected LogicalTypeAnnotation fromString(List params) { VARIANT { @Override protected LogicalTypeAnnotation fromString(List params) { + Preconditions.checkArgument( + params.isEmpty(), "Expecting 0 parameter for variant logical type, got %d", params.size()); + return variantType(); } }, diff --git a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java index 04b4a9432a..1af37bfd54 100644 --- a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java +++ b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java @@ -18,6 +18,7 @@ */ package org.apache.parquet.parser; +import static org.apache.parquet.format.LogicalTypes.VARIANT; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS; import static org.apache.parquet.schema.LogicalTypeAnnotation.intType; @@ -55,6 +56,7 @@ import static org.junit.Assert.assertEquals; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import org.apache.parquet.schema.OriginalType; @@ -447,4 +449,30 @@ public void testEmbeddedAnnotations() { MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString()); assertEquals(expected, reparsed); } + + @Test + public void testVARIANTAnnotation() { + String message = "message Message {\n" + + " required group aVariant (VARIANT) {\n" + + " required binary metadata;\n" + + " required binary value;\n" + + " }\n" + + "}\n"; + + MessageType expected = buildMessage() + .requiredGroup() + .as(LogicalTypeAnnotation.variantType()) + .required(BINARY) + .named("metadata") + .required(BINARY) + .named("value") + .named("aVariant") + .named("Message"); + + MessageType parsed = parseMessageType(message); + + assertEquals(expected, parsed); + MessageType reparsed = parseMessageType(parsed.toString()); + assertEquals(expected, reparsed); + } } diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java index b2d70c9247..c9f43644bd 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java @@ -53,4 +53,5 @@ public static LogicalType DECIMAL(int scale, int precision) { public static final LogicalType JSON = LogicalType.JSON(new JsonType()); public static final LogicalType BSON = LogicalType.BSON(new BsonType()); public static final LogicalType FLOAT16 = LogicalType.FLOAT16(new Float16Type()); + public static final LogicalType VARIANT = LogicalType.VARIANT(new VariantType()); } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 87797d1fa5..c6e9637c8e 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -112,6 +112,7 @@ import org.apache.parquet.format.TypeDefinedOrder; import org.apache.parquet.format.UUIDType; import org.apache.parquet.format.Uncompressed; +import org.apache.parquet.format.VariantType; import org.apache.parquet.format.XxHash; import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; @@ -516,7 +517,7 @@ public Optional visit(LogicalTypeAnnotation.Float16LogicalTypeAnnot } @Override - public Optional visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnotation intervalLogicalType) { + public Optional visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnotation unknownLogicalType) { return of(LogicalType.UNKNOWN(new NullType())); } @@ -524,6 +525,11 @@ public Optional visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnot public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) { return of(LogicalType.UNKNOWN(new NullType())); } + + @Override + public Optional visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) { + return of(LogicalType.VARIANT(new VariantType())); + } } private void addRowGroup( diff --git a/pom.xml b/pom.xml index c81f6f9af5..cd4b8c7877 100644 --- a/pom.xml +++ b/pom.xml @@ -94,7 +94,7 @@ shaded.parquet 3.3.0 - 2.10.0 + 2.11.0 1.15.1 thrift ${thrift.executable} From 12b4061ea185c174d477514671f2692812a1d253 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Mon, 24 Mar 2025 18:33:59 -0700 Subject: [PATCH 3/6] Add Variant in OriginalType --- .../org/apache/parquet/schema/LogicalTypeAnnotation.java | 5 +++-- .../main/java/org/apache/parquet/schema/OriginalType.java | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 6bd593ea52..3f04b51e00 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -260,6 +260,8 @@ public static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, return bsonType(); case MAP_KEY_VALUE: return MapKeyValueTypeAnnotation.getInstance(); + case VARIANT: + return variantType(); default: throw new RuntimeException( "Can't convert original type to logical type, unknown original type " + originalType); @@ -1148,8 +1150,7 @@ private VariantLogicalTypeAnnotation() {} @Override public OriginalType toOriginalType() { - // No OriginalType for Variant - return null; + return OriginalType.VARIANT; } @Override diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java index 6c27be3b87..9054728cc0 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java @@ -44,5 +44,6 @@ public enum OriginalType { INT_64, JSON, BSON, - INTERVAL + INTERVAL, + VARIANT } From f88fd3fce9e965d8fcffabfea526c14c945eb862 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Tue, 25 Mar 2025 09:07:14 -0700 Subject: [PATCH 4/6] Revert "Add Variant in OriginalType" This reverts commit 12b4061ea185c174d477514671f2692812a1d253. --- .../org/apache/parquet/schema/LogicalTypeAnnotation.java | 5 ++--- .../main/java/org/apache/parquet/schema/OriginalType.java | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 3f04b51e00..6bd593ea52 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -260,8 +260,6 @@ public static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, return bsonType(); case MAP_KEY_VALUE: return MapKeyValueTypeAnnotation.getInstance(); - case VARIANT: - return variantType(); default: throw new RuntimeException( "Can't convert original type to logical type, unknown original type " + originalType); @@ -1150,7 +1148,8 @@ private VariantLogicalTypeAnnotation() {} @Override public OriginalType toOriginalType() { - return OriginalType.VARIANT; + // No OriginalType for Variant + return null; } @Override diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java index 9054728cc0..6c27be3b87 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java @@ -44,6 +44,5 @@ public enum OriginalType { INT_64, JSON, BSON, - INTERVAL, - VARIANT + INTERVAL } From a683f6a39311f2fc5b5f5d73047104e627a3c682 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Tue, 25 Mar 2025 09:33:41 -0700 Subject: [PATCH 5/6] Add parsing logical type for GroupType --- .../parquet/schema/MessageTypeParser.java | 21 ++++++++++++++----- .../parquet/parser/TestParquetParser.java | 21 +++++++++---------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java index 2e6cb20963..63a0844955 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java @@ -118,12 +118,23 @@ private static void addGroupType(Tokenizer st, Repetition r, GroupBuilder bui String name = st.nextToken(); // Read annotation, if any. + String annotation = null; t = st.nextToken(); - OriginalType originalType = null; if (t.equalsIgnoreCase("(")) { - originalType = OriginalType.valueOf(st.nextToken()); - childBuilder.as(originalType); - check(st.nextToken(), ")", "original type ended by )", st); + t = st.nextToken(); + if (isLogicalType(t)) { + LogicalTypeAnnotation.LogicalTypeToken logicalType = LogicalTypeAnnotation.LogicalTypeToken.valueOf(t); + LogicalTypeAnnotation logicalTypeAnnotation = logicalType.fromString(new ArrayList<>()); + childBuilder.as(logicalTypeAnnotation); + annotation = logicalTypeAnnotation.toString(); + } else { + // Try to parse as OriginalType + OriginalType originalType = OriginalType.valueOf(t); + childBuilder.as(originalType); + annotation = originalType.toString(); + } + + check(st.nextToken(), ")", "logical type ended by )", st); t = st.nextToken(); } if (t.equals("=")) { @@ -134,7 +145,7 @@ private static void addGroupType(Tokenizer st, Repetition r, GroupBuilder bui addGroupTypeFields(t, st, childBuilder); } catch (IllegalArgumentException e) { throw new IllegalArgumentException( - "problem reading type: type = group, name = " + name + ", original type = " + originalType, e); + "problem reading type: type = group, name = " + name + ", annotation = " + annotation, e); } childBuilder.named(name); diff --git a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java index 1af37bfd54..5c466a3fed 100644 --- a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java +++ b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java @@ -18,7 +18,6 @@ */ package org.apache.parquet.parser; -import static org.apache.parquet.format.LogicalTypes.VARIANT; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS; import static org.apache.parquet.schema.LogicalTypeAnnotation.intType; @@ -452,12 +451,12 @@ public void testEmbeddedAnnotations() { @Test public void testVARIANTAnnotation() { - String message = "message Message {\n" - + " required group aVariant (VARIANT) {\n" - + " required binary metadata;\n" - + " required binary value;\n" - + " }\n" - + "}\n"; + String message = "message Message {\n" + + " required group aVariant (VARIANT) {\n" + + " required binary metadata;\n" + + " required binary value;\n" + + " }\n" + + "}\n"; MessageType expected = buildMessage() .requiredGroup() @@ -469,10 +468,10 @@ public void testVARIANTAnnotation() { .named("aVariant") .named("Message"); - MessageType parsed = parseMessageType(message); + MessageType parsed = parseMessageType(message); - assertEquals(expected, parsed); - MessageType reparsed = parseMessageType(parsed.toString()); - assertEquals(expected, reparsed); + assertEquals(expected, parsed); + MessageType reparsed = parseMessageType(parsed.toString()); + assertEquals(expected, reparsed); } } From ba4bbdf60a49204bf8fc749e5f9d63e161e3cea6 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Wed, 26 Mar 2025 10:45:45 -0700 Subject: [PATCH 6/6] Update variant spec version and add test --- .../parquet/schema/TestTypeBuilders.java | 24 ++++++++++++++ .../TestTypeBuildersWithLogicalTypes.java | 24 ++++++++++++++ .../apache/parquet/format/LogicalTypes.java | 7 ++++- .../parquet/format/TestLogicalTypes.java | 31 +++++++++++++++++++ .../converter/ParquetMetadataConverter.java | 6 ++-- 5 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 parquet-format-structures/src/test/java/org/apache/parquet/format/TestLogicalTypes.java diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java index 551f0a4859..c934de9528 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java @@ -1435,6 +1435,30 @@ public void testVariantLogicalType() { assertEquals(variantExpected, variantActual); } + @Test + public void testVariantLogicalTypeWithShredded() { + String name = "variant_field"; + GroupType variantExpected = new GroupType( + REQUIRED, + name, + LogicalTypeAnnotation.variantType(), + new PrimitiveType(REQUIRED, BINARY, "metadata"), + new PrimitiveType(OPTIONAL, BINARY, "value"), + new PrimitiveType(OPTIONAL, BINARY, "typed_value", LogicalTypeAnnotation.stringType())); + + GroupType variantActual = Types.buildGroup(REQUIRED) + .addFields( + Types.required(BINARY).named("metadata"), + Types.optional(BINARY).named("value"), + Types.optional(BINARY) + .as(LogicalTypeAnnotation.stringType()) + .named("typed_value")) + .as(LogicalTypeAnnotation.variantType()) + .named(name); + + assertEquals(variantExpected, variantActual); + } + @Test(expected = IllegalArgumentException.class) public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() { Types.required(BINARY) diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java index d2b12740f3..bb3d665201 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java @@ -498,6 +498,30 @@ public void testVariantLogicalType() { assertTrue(annotation instanceof LogicalTypeAnnotation.VariantLogicalTypeAnnotation); } + @Test + public void testVariantLogicalTypeWithShredded() { + String name = "variant_field"; + GroupType variant = new GroupType( + REQUIRED, + name, + LogicalTypeAnnotation.variantType(), + Types.required(BINARY).named("metadata"), + Types.optional(BINARY).named("value"), + Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("typed_value")); + + assertEquals( + "required group variant_field (VARIANT) {\n" + + " required binary metadata;\n" + + " optional binary value;\n" + + " optional binary typed_value (STRING);\n" + + "}", + variant.toString()); + + LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation(); + assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT, annotation.getType()); + assertNull(annotation.toOriginalType()); + assertTrue(annotation instanceof LogicalTypeAnnotation.VariantLogicalTypeAnnotation); + } /** * A convenience method to avoid a large number of @Test(expected=...) tests * diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java index c9f43644bd..5f247fc667 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java @@ -53,5 +53,10 @@ public static LogicalType DECIMAL(int scale, int precision) { public static final LogicalType JSON = LogicalType.JSON(new JsonType()); public static final LogicalType BSON = LogicalType.BSON(new BsonType()); public static final LogicalType FLOAT16 = LogicalType.FLOAT16(new Float16Type()); - public static final LogicalType VARIANT = LogicalType.VARIANT(new VariantType()); + + public static final LogicalType VARIANT() { + VariantType type = new VariantType(); + type.setSpecification_version((byte) 1); + return LogicalType.VARIANT(type); + } } diff --git a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestLogicalTypes.java b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestLogicalTypes.java new file mode 100644 index 0000000000..577f254b36 --- /dev/null +++ b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestLogicalTypes.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.format; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class TestLogicalTypes { + @Test + public void testVariantLogicalTypeVersion() { + LogicalType variant = LogicalTypes.VARIANT(); + assertEquals(1, (variant.getVARIANT().getSpecification_version())); + } +} diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index c6e9637c8e..0bf89c7fb5 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -89,6 +89,7 @@ import org.apache.parquet.format.KeyValue; import org.apache.parquet.format.ListType; import org.apache.parquet.format.LogicalType; +import org.apache.parquet.format.LogicalTypes; import org.apache.parquet.format.MapType; import org.apache.parquet.format.MicroSeconds; import org.apache.parquet.format.MilliSeconds; @@ -112,7 +113,6 @@ import org.apache.parquet.format.TypeDefinedOrder; import org.apache.parquet.format.UUIDType; import org.apache.parquet.format.Uncompressed; -import org.apache.parquet.format.VariantType; import org.apache.parquet.format.XxHash; import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; @@ -517,7 +517,7 @@ public Optional visit(LogicalTypeAnnotation.Float16LogicalTypeAnnot } @Override - public Optional visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnotation unknownLogicalType) { + public Optional visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnotation intervalLogicalType) { return of(LogicalType.UNKNOWN(new NullType())); } @@ -528,7 +528,7 @@ public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnno @Override public Optional visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) { - return of(LogicalType.VARIANT(new VariantType())); + return of(LogicalTypes.VARIANT()); } }