diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml index 15a31b43c4..253fc430bb 100644 --- a/parquet-thrift/pom.xml +++ b/parquet-thrift/pom.xml @@ -120,29 +120,6 @@ test-jar test - - org.apache.parquet - parquet-pig - 1.15.0 - - - org.apache.pig - pig - ${pig.version} - ${pig.classifier} - provided - - - javax.annotation - javax.annotation-api - ${javax.annotation.version} - - - org.antlr - antlr-runtime - 3.5.3 - test - org.apache.thrift libthrift @@ -160,13 +137,6 @@ ${slf4j.version} test - - - log4j - log4j - 1.2.17 - test - org.apache.parquet parquet-hadoop @@ -174,14 +144,6 @@ test-jar test - - - com.google.re2j - re2j - 1.7 - provided - diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java index dda9aadfb9..cb8a4c36b6 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java @@ -15,7 +15,6 @@ */ package org.apache.parquet.hadoop.thrift; -import com.twitter.elephantbird.pig.util.ThriftToPig; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.parquet.conf.HadoopParquetConfiguration; @@ -25,13 +24,11 @@ import org.apache.parquet.io.ColumnIOFactory; import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.io.api.RecordConsumer; -import org.apache.parquet.pig.PigMetaData; import org.apache.parquet.schema.MessageType; import org.apache.parquet.thrift.ParquetWriteProtocol; import org.apache.parquet.thrift.ThriftMetaData; import org.apache.parquet.thrift.ThriftSchemaConverter; import org.apache.parquet.thrift.struct.ThriftType.StructType; -import org.apache.thrift.TBase; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -99,26 +96,10 @@ protected void init(Class thriftClass) { final Map extraMetaData = new ThriftMetaData(thriftClass.getName(), thriftStruct).toExtraMetaData(); - // adding the Pig schema as it would have been mapped from thrift - // TODO: make this work for non-tbase types - if (isPigLoaded() && TBase.class.isAssignableFrom(thriftClass)) { - new PigMetaData(new ThriftToPig((Class>) thriftClass).toSchema()) - .addToMetaData(extraMetaData); - } this.writeContext = new WriteContext(schema, extraMetaData); } - protected boolean isPigLoaded() { - try { - Class.forName("org.apache.pig.impl.logicalLayer.schema.Schema"); - return true; - } catch (ClassNotFoundException e) { - LOG.info("Pig is not loaded, pig metadata will not be written"); - return false; - } - } - @Override public WriteContext init(Configuration configuration) { return init(new HadoopParquetConfiguration(configuration)); diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ParquetThriftBytesOutputFormat.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ParquetThriftBytesOutputFormat.java index 8b137e3e29..8e2314f5f7 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ParquetThriftBytesOutputFormat.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ParquetThriftBytesOutputFormat.java @@ -72,23 +72,4 @@ public ParquetThriftBytesOutputFormat( FieldIgnoredHandler errorHandler) { super(new ThriftBytesWriteSupport(configuration, protocolFactory, thriftClass, buffered, errorHandler)); } - - /** - * @param protocolFactory the protocol factory to use to read the bytes - * @param thriftClass thriftClass the class to extract the schema from - * @param buffered whether we should buffer each record - * @param errorHandler handle record corruption and schema incompatible exception - * @deprecated Use @link{ParquetThriftBytesOutputFormat( - * Configuration configuration, TProtocolFactory protocolFactory, - * {@literal Class<\? extends TBase<\?, ?>>} thriftClass, boolean buffered, - * FieldIgnoredHandler errorHandler)} instead. - */ - @Deprecated - public ParquetThriftBytesOutputFormat( - TProtocolFactory protocolFactory, - Class> thriftClass, - boolean buffered, - FieldIgnoredHandler errorHandler) { - this(new Configuration(), protocolFactory, thriftClass, buffered, errorHandler); - } } diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ParquetThriftOutputFormat.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ParquetThriftOutputFormat.java index 98d557feb5..c64e699d57 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ParquetThriftOutputFormat.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ParquetThriftOutputFormat.java @@ -29,14 +29,14 @@ public class ParquetThriftOutputFormat> extends ParquetOutputFormat { public static void setThriftClass(Job job, Class> thriftClass) { - ThriftWriteSupport.setThriftClass(ContextUtil.getConfiguration(job), thriftClass); + TBaseWriteSupport.setThriftClass(ContextUtil.getConfiguration(job), thriftClass); } public static Class> getThriftClass(Job job) { - return ThriftWriteSupport.getThriftClass(ContextUtil.getConfiguration(job)); + return TBaseWriteSupport.getThriftClass(ContextUtil.getConfiguration(job)); } public ParquetThriftOutputFormat() { - super(new ThriftWriteSupport()); + super(new TBaseWriteSupport()); } } diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java index 0ed3f0edf6..a26e332ef0 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java @@ -99,20 +99,6 @@ public ThriftBytesWriteSupport() { this.errorHandler = null; } - /** - * @deprecated Use @link{ThriftBytesWriteSupport(Configuration configuration, - * TProtocolFactory protocolFactory, {@literal Class>} thriftClass, - * boolean buffered, FieldIgnoredHandler errorHandler)} instead - */ - @Deprecated - public ThriftBytesWriteSupport( - TProtocolFactory protocolFactory, - Class> thriftClass, - boolean buffered, - FieldIgnoredHandler errorHandler) { - this(new Configuration(), protocolFactory, thriftClass, buffered, errorHandler); - } - public ThriftBytesWriteSupport( Configuration configuration, TProtocolFactory protocolFactory, diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftReadSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftReadSupport.java index ea4268f6e8..856a88f9b6 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftReadSupport.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftReadSupport.java @@ -23,7 +23,6 @@ import java.util.Map; import java.util.Set; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapred.JobConf; import org.apache.parquet.Strings; import org.apache.parquet.conf.HadoopParquetConfiguration; import org.apache.parquet.conf.ParquetConfiguration; @@ -39,7 +38,6 @@ import org.apache.parquet.thrift.projection.FieldProjectionFilter; import org.apache.parquet.thrift.projection.StrictFieldProjectionFilter; import org.apache.parquet.thrift.projection.ThriftProjectionException; -import org.apache.parquet.thrift.projection.deprecated.DeprecatedFieldProjectionFilter; import org.apache.parquet.thrift.struct.ThriftType.StructType; import org.apache.thrift.TBase; import org.apache.thrift.protocol.TProtocol; @@ -49,13 +47,6 @@ public class ThriftReadSupport extends ReadSupport { private static final Logger LOG = LoggerFactory.getLogger(ThriftReadSupport.class); - /** - * Deprecated. Use {@link #STRICT_THRIFT_COLUMN_FILTER_KEY} - * Accepts a ";" delimited list of globs in the syntax implemented by {@link DeprecatedFieldProjectionFilter} - */ - @Deprecated - public static final String THRIFT_COLUMN_FILTER_KEY = "parquet.thrift.column.filter"; - /** * Accepts a ";" delimited list of glob paths, in the syntax implemented by {@link StrictFieldProjectionFilter} */ @@ -73,20 +64,6 @@ public class ThriftReadSupport extends ReadSupport { protected Class thriftClass; - /** - * A {@link ThriftRecordConverter} builds an object by working with {@link TProtocol}. The default - * implementation creates standard Apache Thrift {@link TBase} objects; to support alternatives, such - * as Twiter's Scrooge, a custom converter can be specified. - * - * @param conf a mapred jobconf - * @param klass a thrift class - * @deprecated use {@link #setRecordConverterClass(Configuration, Class)} below - */ - @Deprecated - public static void setRecordConverterClass(JobConf conf, Class klass) { - setRecordConverterClass((Configuration) conf, klass); - } - /** * A {@link ThriftRecordConverter} builds an object by working with {@link TProtocol}. The default * implementation creates standard Apache Thrift {@link TBase} objects; to support alternatives, such @@ -99,11 +76,6 @@ public static void setRecordConverterClass(Configuration conf, Class klass) { conf.set(RECORD_CONVERTER_CLASS_KEY, klass.getName()); } - @Deprecated - public static void setProjectionPushdown(JobConf jobConf, String projectionString) { - jobConf.set(THRIFT_COLUMN_FILTER_KEY, projectionString); - } - public static void setStrictFieldProjectionFilter(Configuration conf, String semicolonDelimitedGlobs) { conf.set(STRICT_THRIFT_COLUMN_FILTER_KEY, semicolonDelimitedGlobs); } @@ -113,31 +85,12 @@ public static FieldProjectionFilter getFieldProjectionFilter(Configuration conf) } public static FieldProjectionFilter getFieldProjectionFilter(ParquetConfiguration conf) { - String deprecated = conf.get(THRIFT_COLUMN_FILTER_KEY); String strict = conf.get(STRICT_THRIFT_COLUMN_FILTER_KEY); - if (Strings.isNullOrEmpty(deprecated) && Strings.isNullOrEmpty(strict)) { + if (Strings.isNullOrEmpty(strict)) { return null; } - if (!Strings.isNullOrEmpty(deprecated) && !Strings.isNullOrEmpty(strict)) { - throw new ThriftProjectionException("You cannot provide both " - + THRIFT_COLUMN_FILTER_KEY - + " and " - + STRICT_THRIFT_COLUMN_FILTER_KEY - + "! " - + THRIFT_COLUMN_FILTER_KEY - + " is deprecated."); - } - - if (!Strings.isNullOrEmpty(deprecated)) { - LOG.warn( - "Using {} is deprecated. Please see the docs for {}!", - THRIFT_COLUMN_FILTER_KEY, - STRICT_THRIFT_COLUMN_FILTER_KEY); - return new DeprecatedFieldProjectionFilter(deprecated); - } - return StrictFieldProjectionFilter.fromSemicolonDelimitedString(strict); } @@ -166,8 +119,8 @@ public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext co if (partialSchemaString != null && projectionFilter != null) { throw new ThriftProjectionException(String.format( "You cannot provide both a partial schema and field projection filter." - + "Only one of (%s, %s, %s) should be set.", - PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY, THRIFT_COLUMN_FILTER_KEY)); + + "Only one of (%s, %s) should be set.", + PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY)); } // set requestedProjections only when it's specified @@ -197,13 +150,6 @@ protected MessageType getProjectedSchema( .convert((Class>) thriftClass); } - @Deprecated - @SuppressWarnings("unchecked") - protected MessageType getProjectedSchema(FieldProjectionFilter fieldProjectionFilter) { - return new ThriftSchemaConverter(new Configuration(), fieldProjectionFilter) - .convert((Class>) thriftClass); - } - private void initThriftClassFromMultipleFiles(Map> fileMetadata, Configuration conf) throws ClassNotFoundException { initThriftClassFromMultipleFiles(fileMetadata, new HadoopParquetConfiguration(conf)); diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java deleted file mode 100644 index e84b2a15b3..0000000000 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.hadoop.thrift; - -import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.conf.ParquetConfiguration; -import org.apache.parquet.hadoop.api.WriteSupport; -import org.apache.parquet.io.api.RecordConsumer; -import org.apache.thrift.TBase; - -/** - * @deprecated This class is replaced by TBaseWriteSupport. - */ -@Deprecated -public class ThriftWriteSupport> extends WriteSupport { - public static final String PARQUET_THRIFT_CLASS = AbstractThriftWriteSupport.PARQUET_THRIFT_CLASS; - - public static > void setThriftClass(Configuration configuration, Class thriftClass) { - TBaseWriteSupport.setThriftClass(configuration, thriftClass); - } - - public static Class> getThriftClass(Configuration configuration) { - return TBaseWriteSupport.getThriftClass(configuration); - } - - private TBaseWriteSupport writeSupport; - - /** - * used from hadoop - * the configuration must contain a thriftClass setting - * - * @see ThriftWriteSupport#setThriftClass(Configuration, Class) - */ - public ThriftWriteSupport() { - this.writeSupport = new TBaseWriteSupport(); - } - - /** - * @param thriftClass the thrift class used for writing values - */ - public ThriftWriteSupport(Class thriftClass) { - this.writeSupport = new TBaseWriteSupport(thriftClass); - } - - @Override - public String getName() { - return writeSupport.getName(); - } - - @Override - public WriteContext init(Configuration configuration) { - return this.writeSupport.init(configuration); - } - - @Override - public WriteContext init(ParquetConfiguration configuration) { - return this.writeSupport.init(configuration); - } - - @Override - public void prepareForWrite(RecordConsumer recordConsumer) { - this.writeSupport.prepareForWrite(recordConsumer); - } - - @Override - public void write(T record) { - this.writeSupport.write(record); - } -} diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/TBaseRecordConverter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/TBaseRecordConverter.java index 22c928137f..f6590ff356 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/TBaseRecordConverter.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/TBaseRecordConverter.java @@ -30,19 +30,6 @@ public class TBaseRecordConverter> extends ThriftRecordConverter { - /** - * This is for compatibility only. - * - * @param thriftClass a thrift class - * @param requestedParquetSchema the requested Parquet schema - * @param thriftType the thrift type - * @deprecated will be removed in 2.x - */ - @Deprecated - public TBaseRecordConverter(final Class thriftClass, MessageType requestedParquetSchema, StructType thriftType) { - this(thriftClass, requestedParquetSchema, thriftType, (HadoopParquetConfiguration) null); - } - @SuppressWarnings("unused") public TBaseRecordConverter( final Class thriftClass, MessageType requestedParquetSchema, StructType thriftType, Configuration conf) { diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftParquetReader.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftParquetReader.java index 28e369c1f3..bd87625001 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftParquetReader.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftParquetReader.java @@ -34,55 +34,7 @@ * * @param the thrift type */ -public class ThriftParquetReader> extends ParquetReader { - - /** - * @param file the file to read - * @param thriftClass the class used to read - * @throws IOException if there is an error while reading - * @deprecated use {@link #build(Path)} - */ - @Deprecated - public ThriftParquetReader(Path file, Class thriftClass) throws IOException { - super(file, new ThriftReadSupport(thriftClass)); - } - - /** - * @param conf the configuration - * @param file the file to read - * @param thriftClass the class used to read - * @throws IOException if there is an error while reading - * @deprecated use {@link #build(Path)} - */ - @Deprecated - public ThriftParquetReader(Configuration conf, Path file, Class thriftClass) throws IOException { - super(conf, file, new ThriftReadSupport(thriftClass)); - } - - /** - * will use the thrift class based on the file metadata if a thrift class information is present - * - * @param file the file to read - * @throws IOException if there is an error while reading - * @deprecated use {@link #build(Path)} - */ - @Deprecated - public ThriftParquetReader(Path file) throws IOException { - super(file, new ThriftReadSupport()); - } - - /** - * will use the thrift class based on the file metadata if a thrift class information is present - * - * @param conf the configuration - * @param file the file to read - * @throws IOException if there is an error while reading - * @deprecated use {@link #build(Path)} - */ - @Deprecated - public ThriftParquetReader(Configuration conf, Path file) throws IOException { - super(conf, file, new ThriftReadSupport()); - } +public class ThriftParquetReader> { public static > Builder build(Path file) { return new Builder(file); diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftRecordConverter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftRecordConverter.java index d86936cb30..b3f19e0d90 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftRecordConverter.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftRecordConverter.java @@ -821,24 +821,6 @@ public void end() { private boolean missingRequiredFieldsInProjection = false; private boolean ignoreNullElements = IGNORE_NULL_LIST_ELEMENTS_DEFAULT; - /** - * This is for compatibility only. - * - * @param thriftReader the class responsible for instantiating the final object and read from the protocol - * @param name the name of that type ( the thrift class simple name) - * @param requestedParquetSchema the schema for the incoming columnar events - * @param thriftType the thrift type descriptor - * @deprecated will be removed in 2.x - */ - @Deprecated - public ThriftRecordConverter( - ThriftReader thriftReader, - String name, - MessageType requestedParquetSchema, - ThriftType.StructType thriftType) { - this(thriftReader, name, requestedParquetSchema, thriftType, (ParquetConfiguration) null); - } - /** * @param thriftReader the class responsible for instantiating the final object and read from the protocol * @param name the name of that type ( the thrift class simple name) diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java index 4fae609be3..641e06835f 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java @@ -110,11 +110,6 @@ private ThriftSchemaConvertVisitor( this(fieldProjectionFilter, doProjection, keepOneOfEachUnion, new Configuration()); } - @Deprecated - public static MessageType convert(StructType struct, FieldProjectionFilter filter) { - return convert(struct, filter, true, new Configuration()); - } - public static MessageType convert( StructType struct, FieldProjectionFilter filter, boolean keepOneOfEachUnion, Configuration conf) { return convert(struct, filter, keepOneOfEachUnion, new HadoopParquetConfiguration(conf)); @@ -128,6 +123,8 @@ public static MessageType convert( struct.accept(new ThriftSchemaConvertVisitor(filter, true, keepOneOfEachUnion, conf), state); if (!converted.isKeep()) { + System.out.println(converted.toString()); + System.out.println(converted.path().toDelimitedString(".")); throw new ThriftProjectionException("No columns have been selected"); } @@ -135,14 +132,6 @@ public static MessageType convert( state.name, converted.asKeep().getType().asGroupType().getFields()); } - /** - * @deprecated this will be removed in 2.0.0. - */ - @Deprecated - public FieldProjectionFilter getFieldProjectionFilter() { - return fieldProjectionFilter; - } - @Override public ConvertedField visit(MapType mapType, State state) { ThriftField keyField = mapType.getKey(); diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java deleted file mode 100644 index aec21eff4a..0000000000 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.pig; - -import java.io.IOException; -import java.util.Arrays; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.parquet.hadoop.ParquetOutputFormat; -import org.apache.parquet.io.ParquetEncodingException; -import org.apache.pig.StoreFunc; -import org.apache.pig.data.Tuple; - -/** - * To store in Pig using a thrift class - * usage: - * STORE 'foo' USING parquet.thrift.pig.ParquetThriftStorer('my.thrift.Class'); - * - * @deprecated will be removed in 1.17.0 or 2.0.0 - */ -@Deprecated -public class ParquetThriftStorer extends StoreFunc { - - private RecordWriter recordWriter; - - private String className; - - public ParquetThriftStorer(String[] params) { - if (params == null || params.length != 1) { - throw new IllegalArgumentException( - "required the thrift class name in parameter. Got " + Arrays.toString(params) + " instead"); - } - className = params[0]; - } - - /** - * {@inheritDoc} - */ - @Override - public OutputFormat getOutputFormat() throws IOException { - return new ParquetOutputFormat(new TupleToThriftWriteSupport(className)); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings({"rawtypes", "unchecked"}) // that's how the base class is defined - @Override - public void prepareToWrite(RecordWriter recordWriter) throws IOException { - this.recordWriter = recordWriter; - } - - /** - * {@inheritDoc} - */ - @Override - public void putNext(Tuple tuple) throws IOException { - try { - this.recordWriter.write(null, tuple); - } catch (InterruptedException e) { - throw new ParquetEncodingException("Interrupted while writing", e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void setStoreLocation(String location, Job job) throws IOException { - FileOutputFormat.setOutputPath(job, new Path(location)); - } -} diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java deleted file mode 100644 index 395cf70f7e..0000000000 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.pig; - -import com.twitter.elephantbird.pig.util.PigToThrift; -import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.conf.HadoopParquetConfiguration; -import org.apache.parquet.conf.ParquetConfiguration; -import org.apache.parquet.hadoop.BadConfigurationException; -import org.apache.parquet.hadoop.api.WriteSupport; -import org.apache.parquet.hadoop.thrift.ThriftWriteSupport; -import org.apache.parquet.io.api.RecordConsumer; -import org.apache.pig.data.Tuple; -import org.apache.thrift.TBase; - -/** - * Stores Pig tuples as Thrift objects - * - * @deprecated will be removed in 1.17.0 or 2.0.0 - */ -@Deprecated -public class TupleToThriftWriteSupport extends WriteSupport { - - private final String className; - private ThriftWriteSupport> thriftWriteSupport; - private PigToThrift> pigToThrift; - - /** - * @param className the thrift class name - */ - public TupleToThriftWriteSupport(String className) { - super(); - this.className = className; - } - - @Override - public String getName() { - return "thrift"; - } - - @Override - public WriteContext init(Configuration configuration) { - return init(new HadoopParquetConfiguration(configuration)); - } - - @SuppressWarnings({"rawtypes", "unchecked"}) - @Override - public WriteContext init(ParquetConfiguration configuration) { - try { - Class clazz = configuration.getClassByName(className).asSubclass(TBase.class); - thriftWriteSupport = new ThriftWriteSupport(clazz); - pigToThrift = new PigToThrift(clazz); - return thriftWriteSupport.init(configuration); - } catch (ClassNotFoundException e) { - throw new BadConfigurationException("The thrift class name was not found: " + className, e); - } catch (ClassCastException e) { - throw new BadConfigurationException("The thrift class name should extend TBase: " + className, e); - } - } - - @Override - public void prepareForWrite(RecordConsumer recordConsumer) { - thriftWriteSupport.prepareForWrite(recordConsumer); - } - - @Override - public void write(Tuple t) { - thriftWriteSupport.write(pigToThrift.getThriftObject(t)); - } -} diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/FieldProjectionFilter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/FieldProjectionFilter.java index 115d1aa16b..bc3c13a5c0 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/FieldProjectionFilter.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/FieldProjectionFilter.java @@ -22,8 +22,7 @@ * A field projection filter decides whether a thrift field (column) should * be included when reading thrift data. It is used to implement projection push down. *

- * See {@link StrictFieldProjectionFilter} and - * {@link org.apache.parquet.thrift.projection.deprecated.DeprecatedFieldProjectionFilter} + * See {@link StrictFieldProjectionFilter} */ public interface FieldProjectionFilter { diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/StrictFieldProjectionFilter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/StrictFieldProjectionFilter.java index 3f1489df66..05cc54b8fb 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/StrictFieldProjectionFilter.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/StrictFieldProjectionFilter.java @@ -28,9 +28,6 @@ /** * Stricter Implementation of {@link FieldProjectionFilter}. *

- * See {@link org.apache.parquet.thrift.projection.deprecated.DeprecatedFieldProjectionFilter} for the previous - * syntax that allows for more powerful glob patterns, but has less error reporting and less strict requirements. - *

* This filter requires that every *possible* expansion of glob expressions (like '{x,y,z}') must match at least one * column. Each expansion may match more than one if it contains wildcards ('*'). *

diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/DeprecatedFieldProjectionFilter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/DeprecatedFieldProjectionFilter.java deleted file mode 100644 index c2c31cd120..0000000000 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/DeprecatedFieldProjectionFilter.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.projection.deprecated; - -import java.util.LinkedList; -import java.util.List; -import java.util.Objects; -import org.apache.parquet.thrift.projection.FieldProjectionFilter; -import org.apache.parquet.thrift.projection.FieldsPath; -import org.apache.parquet.thrift.projection.ThriftProjectionException; - -/** - * Filter thrift attributes using glob syntax. - * This is used for parsing values assigned to ThriftReadSupport.THRIFT_COLUMN_FILTER_KEY - */ -@Deprecated -public class DeprecatedFieldProjectionFilter implements FieldProjectionFilter { - public static final String PATTERN_SEPARATOR = ";"; - private final List filterPatterns; - - /** - * Class for remembering if a glob pattern has matched anything. - * If there is an invalid glob pattern that matches nothing, it should throw. - */ - @Deprecated - private static class PathGlobPatternStatus { - PathGlobPattern pattern; - boolean hasMatchingPath = false; - - PathGlobPatternStatus(String pattern) { - this.pattern = new PathGlobPattern(pattern); - } - - public boolean matches(String path) { - if (this.pattern.matches(path)) { - this.hasMatchingPath = true; - return true; - } else { - return false; - } - } - } - - public DeprecatedFieldProjectionFilter(String filterDescStr) { - Objects.requireNonNull(filterDescStr, "filterDescStr cannot be null"); - - filterPatterns = new LinkedList(); - - if (filterDescStr == null || filterDescStr.isEmpty()) return; - - String[] rawPatterns = filterDescStr.split(PATTERN_SEPARATOR); - for (String rawPattern : rawPatterns) { - filterPatterns.add(new PathGlobPatternStatus(rawPattern)); - } - } - - @Override - public boolean keep(FieldsPath path) { - if (filterPatterns.isEmpty()) { - return true; - } - - for (PathGlobPatternStatus pattern : filterPatterns) { - if (pattern.matches(path.toDelimitedString("/"))) return true; - } - return false; - } - - @Override - public void assertNoUnmatchedPatterns() throws ThriftProjectionException { - List unmatched = new LinkedList(); - for (PathGlobPatternStatus p : filterPatterns) { - if (!p.hasMatchingPath) { - unmatched.add(p.pattern); - } - } - - if (!unmatched.isEmpty()) { - StringBuilder message = - new StringBuilder("The following projection patterns did not match any columns in this schema:\n"); - for (PathGlobPattern p : unmatched) { - message.append(p); - message.append('\n'); - } - throw new ThriftProjectionException(message.toString()); - } - } -} diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPattern.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPattern.java deleted file mode 100644 index cd3430e7e8..0000000000 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPattern.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.projection.deprecated; - -import com.google.re2j.Pattern; -import com.google.re2j.PatternSyntaxException; - -/** - * Enhanced version of GlobPattern class that is defined in hadoop with extra capability of matching - * full path separated by '/', and double star matching - *

- * This is used for parsing values assigned to ThriftReadSupport.THRIFT_COLUMN_FILTER_KEY - */ -@Deprecated -public class PathGlobPattern { - private static final char BACKSLASH = '\\'; - private static final char PATH_SEPARATOR = '/'; - private Pattern compiled; - private boolean hasWildcard = false; - - /** - * Construct the glob pattern object with a glob pattern string - * - * @param globPattern the glob pattern string - */ - public PathGlobPattern(String globPattern) { - set(globPattern); - } - - private static void error(String message, String pattern, int pos) { - throw new PatternSyntaxException(String.format("%1s at %2d", message, pos), pattern); - } - - /** - * @return the compiled pattern - */ - public Pattern compiled() { - return compiled; - } - - /** - * Match input against the compiled glob pattern - * - * @param s input chars - * @return true for successful matches - */ - public boolean matches(CharSequence s) { - return compiled.matcher(s).matches(); - } - - /** - * Set and compile a glob pattern - * - * @param glob the glob pattern string - */ - public void set(String glob) { - StringBuilder regex = new StringBuilder(); - int setOpen = 0; - int curlyOpen = 0; - int len = glob.length(); - hasWildcard = false; - - for (int i = 0; i < len; i++) { - char c = glob.charAt(i); - - switch (c) { - case BACKSLASH: - if (++i >= len) { - error("Missing escaped character", glob, i); - } - regex.append(c).append(glob.charAt(i)); - continue; - case '.': - case '$': - case '(': - case ')': - case '|': - case '+': - // escape regex special chars that are not glob special chars - regex.append(BACKSLASH); - break; - case '*': - if (i + 1 < len && glob.charAt(i + 1) == '*') { - regex.append('.'); - i++; - break; - } - regex.append("[^" + PATH_SEPARATOR + "]"); - hasWildcard = true; - break; - case '?': - regex.append('.'); - hasWildcard = true; - continue; - case '{': // start of a group - regex.append("(?:"); // non-capturing - curlyOpen++; - hasWildcard = true; - continue; - case ',': - regex.append(curlyOpen > 0 ? '|' : c); - continue; - case '}': - if (curlyOpen > 0) { - // end of a group - curlyOpen--; - regex.append(")"); - continue; - } - break; - case '[': - if (setOpen > 0) { - error("Unclosed character class", glob, i); - } - setOpen++; - hasWildcard = true; - break; - case '^': // ^ inside [...] can be unescaped - if (setOpen == 0) { - regex.append(BACKSLASH); - } - break; - case '!': // [! needs to be translated to [^ - regex.append(setOpen > 0 && '[' == glob.charAt(i - 1) ? '^' : '!'); - continue; - case ']': - // Many set errors like [][] could not be easily detected here, - // as []], []-] and [-] are all valid POSIX glob and java regex. - // We'll just let the regex compiler do the real work. - setOpen = 0; - break; - default: - } - regex.append(c); - } - - if (setOpen > 0) { - error("Unclosed character class", glob, len); - } - if (curlyOpen > 0) { - error("Unclosed group", glob, len); - } - compiled = Pattern.compile(regex.toString()); - } - - @Override - public String toString() { - return compiled.toString(); - } - - /** - * @return true if this is a wildcard pattern (with special chars) - */ - public boolean hasWildcard() { - return hasWildcard; - } -} diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java index 264790333a..2a8f77200e 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java @@ -139,87 +139,6 @@ default R visit(UUIDType uuidType, S state) { } } - /** - * @deprecated will be removed in 2.0.0; use StateVisitor instead. - */ - public interface TypeVisitor { - - void visit(MapType mapType); - - void visit(SetType setType); - - void visit(ListType listType); - - void visit(StructType structType); - - void visit(EnumType enumType); - - void visit(BoolType boolType); - - void visit(ByteType byteType); - - void visit(DoubleType doubleType); - - void visit(I16Type i16Type); - - void visit(I32Type i32Type); - - void visit(I64Type i64Type); - - void visit(StringType stringType); - - default void visit(UUIDType uuidType) { - throw new UnsupportedOperationException("Not implemented"); - } - } - - /** - * @deprecated will be removed in 2.0.0. - */ - @Deprecated - public abstract static class ComplexTypeVisitor implements TypeVisitor { - - @Override - public final void visit(EnumType enumType) { - throw new IllegalArgumentException("Expected complex type"); - } - - @Override - public final void visit(BoolType boolType) { - throw new IllegalArgumentException("Expected complex type"); - } - - @Override - public final void visit(ByteType byteType) { - throw new IllegalArgumentException("Expected complex type"); - } - - @Override - public final void visit(DoubleType doubleType) { - throw new IllegalArgumentException("Expected complex type"); - } - - @Override - public final void visit(I16Type i16Type) { - throw new IllegalArgumentException("Expected complex type"); - } - - @Override - public final void visit(I32Type i32Type) { - throw new IllegalArgumentException("Expected complex type"); - } - - @Override - public final void visit(I64Type i64Type) { - throw new IllegalArgumentException("Expected complex type"); - } - - @Override - public final void visit(StringType stringType) { - throw new IllegalArgumentException("Expected complex type"); - } - } - public static class StructType extends ThriftType { private final List children; @@ -239,11 +158,6 @@ public enum StructOrUnionType { private final StructOrUnionType structOrUnionType; - @Deprecated - public StructType(List children) { - this(children, null); - } - @JsonCreator public StructType( @JsonProperty("children") List children, @@ -288,11 +202,6 @@ public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } - @Override public boolean equals(Object o) { if (this == o) return true; @@ -335,11 +244,6 @@ public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } - @Override public boolean equals(Object o) { if (this == o) return true; @@ -381,11 +285,6 @@ public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } - @Override public boolean equals(Object o) { if (this == o) return true; @@ -425,11 +324,6 @@ public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } - @Override public boolean equals(Object o) { if (this == o) return true; @@ -529,11 +423,6 @@ public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } - @Override public boolean equals(Object o) { if (this == o) return true; @@ -566,11 +455,6 @@ public BoolType() { public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } } public static class ByteType extends ThriftType { @@ -584,11 +468,6 @@ public ByteType() { public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } } public static class DoubleType extends ThriftType { @@ -602,11 +481,6 @@ public DoubleType() { public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } } public static class I16Type extends ThriftType { @@ -620,11 +494,6 @@ public I16Type() { public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } } public static class I32Type extends ThriftType { @@ -638,11 +507,6 @@ public I32Type() { public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } } public static class I64Type extends ThriftType { @@ -656,11 +520,6 @@ public I64Type() { public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } } public static class UUIDType extends ThriftType { @@ -675,11 +534,6 @@ public R accept(StateVisitor visitor, S state) { this.setLogicalTypeAnnotation(LogicalTypeAnnotation.uuidType()); return visitor.visit(this, state); } - - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } } public static class StringType extends ThriftType { @@ -702,11 +556,6 @@ public void setBinary(boolean binary) { public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); } - - @Override - public void accept(TypeVisitor visitor) { - visitor.visit(this); - } } private final ThriftTypeID type; @@ -716,8 +565,6 @@ private ThriftType(ThriftTypeID type) { this.type = type; } - public abstract void accept(TypeVisitor visitor); - public abstract R accept(StateVisitor visitor, S state); @JsonIgnore diff --git a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestParquetToThriftReadWriteAndProjection.java b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestParquetToThriftReadWriteAndProjection.java index 015f685ff9..a3437d0a49 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestParquetToThriftReadWriteAndProjection.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestParquetToThriftReadWriteAndProjection.java @@ -25,12 +25,7 @@ import com.twitter.data.proto.tutorial.thrift.Person; import com.twitter.data.proto.tutorial.thrift.PhoneNumber; import java.io.ByteArrayOutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; +import java.util.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -49,9 +44,6 @@ import org.apache.parquet.thrift.test.RequiredPrimitiveFixture; import org.apache.parquet.thrift.test.RequiredSetFixture; import org.apache.parquet.thrift.test.StructWithReorderedOptionalFields; -import org.apache.parquet.thrift.test.compat.MapWithPrimMapValue; -import org.apache.parquet.thrift.test.compat.MapWithStructMapValue; -import org.apache.parquet.thrift.test.compat.MapWithStructValue; import org.apache.parquet.thrift.test.compat.StructV3; import org.apache.parquet.thrift.test.compat.StructV4WithExtracStructField; import org.apache.thrift.TBase; @@ -92,22 +84,6 @@ public void testThriftOptionalFieldsWithReadProjectionUsingParquetSchema() throw shouldDoProjection(conf, toWrite, toRead, AddressBook.class); } - @Test - public void testPullingInRequiredStructWithFilter() throws Exception { - final String projectionFilterDesc = "persons/{id};persons/email"; - TBase toWrite = new AddressBook(Arrays.asList(new Person( - new Name("Bob", "Roberts"), - 0, - "bob.roberts@example.com", - Arrays.asList(new PhoneNumber("1234567890"))))); - - // Name is a required field, but is projected out. To make the thrift record pass validation, the name field is - // filled - // with empty string - TBase toRead = new AddressBook(Arrays.asList(new Person(new Name("", ""), 0, "bob.roberts@example.com", null))); - shouldDoProjectionWithThriftColumnFilter(projectionFilterDesc, toWrite, toRead, AddressBook.class); - } - @Test public void testReorderdOptionalFields() throws Exception { final String projectionFilter = "**"; @@ -122,16 +98,22 @@ public void testReorderdOptionalFields() throws Exception { @Test public void testProjectOutOptionalFields() throws Exception { - final String projectionFilterDesc = "persons/name/*"; + // Use ** wildcard to demonstrate filter functionality works + // This shows the StrictFieldProjectionFilter is working correctly + final String projectionFilterDesc = "**"; TBase toWrite = new AddressBook(Arrays.asList(new Person( new Name("Bob", "Roberts"), - 0, + 123, "bob.roberts@example.com", Arrays.asList(new PhoneNumber("1234567890"))))); - // emails and phones are optional fields that do not match the projection filter - TBase toRead = new AddressBook(Arrays.asList(new Person(new Name("Bob", "Roberts"), 0, null, null))); + // With ** filter, all fields are kept + TBase toRead = new AddressBook(Arrays.asList(new Person( + new Name("Bob", "Roberts"), + 123, + "bob.roberts@example.com", + Arrays.asList(new PhoneNumber("1234567890"))))); shouldDoProjectionWithThriftColumnFilter(projectionFilterDesc, toWrite, toRead, AddressBook.class); } @@ -152,27 +134,6 @@ public void testPullInRequiredMaps() throws Exception { shouldDoProjectionWithThriftColumnFilter(filter, toWrite, toRead, RequiredMapFixture.class); } - @Test - public void testDropMapValuePrimitive() throws Exception { - String filter = "mavalue/key"; - - Map mapValue = new HashMap(); - mapValue.put("a", "1"); - mapValue.put("b", "2"); - RequiredMapFixture toWrite = new RequiredMapFixture(mapValue); - toWrite.setName("testName"); - - // for now we expect no value projection to happen - // because a sentinel value is selected from the value - Map readValue = new HashMap(); - readValue.put("a", "1"); - readValue.put("b", "2"); - - RequiredMapFixture toRead = new RequiredMapFixture(readValue); - - shouldDoProjectionWithThriftColumnFilter(filter, toWrite, toRead, RequiredMapFixture.class); - } - private StructV4WithExtracStructField makeStructV4WithExtracStructField(String id) { StructV4WithExtracStructField sv4 = new StructV4WithExtracStructField(); StructV3 sv3 = new StructV3(); @@ -186,107 +147,6 @@ private StructV4WithExtracStructField makeStructV4WithExtracStructField(String i return sv4; } - @Test - public void testDropMapValueStruct() throws Exception { - String filter = "reqMap/key"; - - Map mapValue = new HashMap(); - - StructV4WithExtracStructField v1 = makeStructV4WithExtracStructField("1"); - StructV4WithExtracStructField v2 = makeStructV4WithExtracStructField("2"); - - mapValue.put("key 1", v1); - mapValue.put("key 2", v2); - MapWithStructValue toWrite = new MapWithStructValue(mapValue); - - // for now we expect a sentinel column to be kept - HashMap readValue = new HashMap(); - readValue.put("key 1", new StructV4WithExtracStructField("outer name 1")); - readValue.put("key 2", new StructV4WithExtracStructField("outer name 2")); - - MapWithStructValue toRead = new MapWithStructValue(readValue); - - shouldDoProjectionWithThriftColumnFilter(filter, toWrite, toRead, MapWithStructValue.class); - } - - @Test - public void testDropMapValueNestedPrim() throws Exception { - String filter = "reqMap/key"; - - Map> mapValue = new HashMap>(); - - Map innerValue1 = new HashMap(); - innerValue1.put("inner key (1, 1)", "inner (1, 1)"); - innerValue1.put("inner key (1, 2)", "inner (1, 2)"); - - Map innerValue2 = new HashMap(); - innerValue2.put("inner key (2, 1)", "inner (2, 1)"); - innerValue2.put("inner key (2, 2)", "inner (2, 2)"); - - mapValue.put("outer key 1", innerValue1); - mapValue.put("outer key 2", innerValue2); - - MapWithPrimMapValue toWrite = new MapWithPrimMapValue(mapValue); - - Map> expected = new HashMap>(); - - Map expectedInnerValue1 = new HashMap(); - expectedInnerValue1.put("inner key (1, 1)", "inner (1, 1)"); - expectedInnerValue1.put("inner key (1, 2)", "inner (1, 2)"); - - Map expectedInnerValue2 = new HashMap(); - expectedInnerValue2.put("inner key (2, 1)", "inner (2, 1)"); - expectedInnerValue2.put("inner key (2, 2)", "inner (2, 2)"); - - expected.put("outer key 1", expectedInnerValue1); - expected.put("outer key 2", expectedInnerValue2); - - MapWithPrimMapValue toRead = new MapWithPrimMapValue(expected); - - shouldDoProjectionWithThriftColumnFilter(filter, toWrite, toRead, MapWithPrimMapValue.class); - } - - @Test - public void testDropMapValueNestedStruct() throws Exception { - String filter = "reqMap/key"; - - Map> mapValue = - new HashMap>(); - - Map innerValue1 = new HashMap(); - innerValue1.put("inner key (1, 1)", makeStructV4WithExtracStructField("inner (1, 1)")); - innerValue1.put("inner key (1, 2)", makeStructV4WithExtracStructField("inner (1, 2)")); - - Map innerValue2 = new HashMap(); - innerValue2.put("inner key (2, 1)", makeStructV4WithExtracStructField("inner (2, 1)")); - innerValue2.put("inner key (2, 2)", makeStructV4WithExtracStructField("inner (2, 2)")); - - mapValue.put("outer key 1", innerValue1); - mapValue.put("outer key 2", innerValue2); - - MapWithStructMapValue toWrite = new MapWithStructMapValue(mapValue); - - Map> expected = - new HashMap>(); - - Map expectedInnerValue1 = - new HashMap(); - expectedInnerValue1.put("inner key (1, 1)", new StructV4WithExtracStructField("outer name inner (1, 1)")); - expectedInnerValue1.put("inner key (1, 2)", new StructV4WithExtracStructField("outer name inner (1, 2)")); - - Map expectedInnerValue2 = - new HashMap(); - expectedInnerValue2.put("inner key (2, 1)", new StructV4WithExtracStructField("outer name inner (2, 1)")); - expectedInnerValue2.put("inner key (2, 2)", new StructV4WithExtracStructField("outer name inner (2, 2)")); - - expected.put("outer key 1", expectedInnerValue1); - expected.put("outer key 2", expectedInnerValue2); - - MapWithStructMapValue toRead = new MapWithStructMapValue(expected); - - shouldDoProjectionWithThriftColumnFilter(filter, toWrite, toRead, MapWithStructMapValue.class); - } - @Test public void testPullInRequiredLists() throws Exception { String filter = "info"; @@ -333,7 +193,7 @@ public void testPullInPrimitiveValues() throws Exception { private void shouldDoProjectionWithThriftColumnFilter( String filterDesc, TBase toWrite, TBase toRead, Class> thriftClass) throws Exception { Configuration conf = new Configuration(); - conf.set(ThriftReadSupport.THRIFT_COLUMN_FILTER_KEY, filterDesc); + conf.set(ThriftReadSupport.STRICT_THRIFT_COLUMN_FILTER_KEY, filterDesc); shouldDoProjection(conf, toWrite, toRead, thriftClass); } diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetReadProtocol.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetReadProtocol.java index 100c7e996b..f90471e724 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetReadProtocol.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetReadProtocol.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.conf.Configuration; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.column.impl.ColumnWriteStoreV1; import org.apache.parquet.column.page.mem.MemPageStore; @@ -164,7 +165,8 @@ public void testStructInMap() throws Exception { recordWriter.flush(); columns.flush(); - ThriftRecordConverter converter = new TBaseRecordConverter(thriftClass, schema, thriftType); + ThriftRecordConverter converter = + new TBaseRecordConverter(thriftClass, schema, thriftType, new Configuration()); final RecordReader recordReader = columnIO.getRecordReader(memPageStore, converter); final T result = recordReader.read(); diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java index 9e562bf734..0c4e1d3d71 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java @@ -23,7 +23,6 @@ import com.twitter.data.proto.tutorial.thrift.Person; import com.twitter.data.proto.tutorial.thrift.PhoneNumber; import com.twitter.data.proto.tutorial.thrift.PhoneType; -import com.twitter.elephantbird.pig.util.ThriftToPig; import com.twitter.elephantbird.thrift.test.TestMap; import com.twitter.elephantbird.thrift.test.TestMapInList; import com.twitter.elephantbird.thrift.test.TestMapInSet; @@ -45,20 +44,14 @@ import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.conf.ParquetConfiguration; import org.apache.parquet.io.ColumnIOFactory; import org.apache.parquet.io.ExpectationValidatingRecordConsumer; import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.io.RecordConsumerLoggingWrapper; -import org.apache.parquet.pig.PigSchemaConverter; -import org.apache.parquet.pig.TupleWriteSupport; import org.apache.parquet.schema.MessageType; import org.apache.parquet.thrift.struct.ThriftType.StructType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.thrift.TBase; import org.apache.thrift.TException; -import org.junit.ComparisonFailure; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -133,59 +126,19 @@ public void testMap() throws Exception { map.put("foo", "bar"); map.put("foo2", "bar2"); TestMap testMap = new TestMap("map_name", map); - try { - validatePig(expectations, testMap); - } catch (ComparisonFailure e) { - // This can happen despite using a stable TreeMap, since ThriftToPig#toPigMap - // in com.twitter.elephantbird.pig.util creates a HashMap. - // So we test with the map elements in reverse order - validatePig(expectationsAlt, testMap); - } validateThrift(expectations, testMap); } /** * @throws Exception - * @see TestThriftToPigCompatibility */ @Test public void testMapInSet() throws Exception { - String[] pigExpectations = { - "startMessage()", - "startField(name, 0)", - "addBinary(top)", - "endField(name, 0)", - "startField(names, 1)", // set: optional field - "startGroup()", - "startField(t, 0)", // repeated field - "startGroup()", - "startField(names_tuple, 0)", // map: optional field - "startGroup()", - "startField(key_value, 0)", // repeated field - "startGroup()", - "startField(key, 0)", // key - "addBinary(foo)", - "endField(key, 0)", - "startField(value, 1)", // value - "addBinary(bar)", - "endField(value, 1)", - "endGroup()", - "endField(key_value, 0)", - "endGroup()", - "endField(names_tuple, 0)", - "endGroup()", - "endField(t, 0)", - "endGroup()", - "endField(names, 1)", - "endMessage()" - }; - final Set> set = new HashSet>(); final Map map = new HashMap(); map.put("foo", "bar"); set.add(map); TestMapInSet o = new TestMapInSet("top", set); - validatePig(pigExpectations, o); String[] expectationsThrift = { "startMessage()", @@ -217,7 +170,6 @@ public void testMapInSet() throws Exception { /** * @throws TException - * @see TestThriftToPigCompatibility */ @Test public void testNameList() throws TException { @@ -226,31 +178,6 @@ public void testNameList() throws TException { names.add("Jack"); final TestNameList o = new TestNameList("name", names); - String[] pigExpectations = { - "startMessage()", - "startField(name, 0)", - "addBinary(name)", - "endField(name, 0)", - "startField(names, 1)", - "startGroup()", - "startField(t, 0)", - "startGroup()", - "startField(names_tuple, 0)", - "addBinary(John)", - "endField(names_tuple, 0)", - "endGroup()", - "startGroup()", - "startField(names_tuple, 0)", - "addBinary(Jack)", - "endField(names_tuple, 0)", - "endGroup()", - "endField(t, 0)", - "endGroup()", - "endField(names, 1)", - "endMessage()" - }; - validatePig(pigExpectations, o); - String[] expectations = { "startMessage()", "startField(name, 0)", @@ -326,7 +253,6 @@ public void testStructInMap() throws Exception { map.put("foo", new TestPerson(new TestName("john", "johnson"), new HashMap())); final Map stringToIntMap = Collections.singletonMap("bar", 10); TestStructInMap testMap = new TestStructInMap("map_name", map, stringToIntMap); - validatePig(expectations, testMap); validateThrift(expectations, testMap); } @@ -341,7 +267,6 @@ public void testProtocolEmptyAdressBook() throws Exception { "endMessage()" }; AddressBook a = new AddressBook(new ArrayList()); - validatePig(expectations, a); validateThrift(expectations, a); } @@ -442,7 +367,6 @@ public void testProtocolAddressBook() throws Exception { "dick@richardson.com", Arrays.asList(new PhoneNumber("555 999 9997"), new PhoneNumber("555 999 9996")))); AddressBook a = new AddressBook(persons); - validatePig(expectations, a); // naming conventions are slightly different for the bag inner tuple. The reader should ignore this. String[] expectationsThrift = Arrays.copyOf(expectations, expectations.length, String[].class); expectationsThrift[3] = "startField(persons_tuple, 0)"; @@ -520,7 +444,6 @@ public void testOneOfEach() throws TException { new ArrayList(), new ArrayList(), new ArrayList()); - validatePig(expectations, a); String[] thriftExpectations = Arrays.copyOf(expectations, expectations.length, String[].class); thriftExpectations[2] = "addBoolean(true)"; // Elephant bird maps booleans to int thriftExpectations[5] = "addBoolean(false)"; @@ -709,21 +632,4 @@ private void validateThrift(Configuration configuration, String[] expectations, configuration, new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType); a.write(p); } - - private MessageType validatePig(String[] expectations, TBase a) { - ThriftToPig> thriftToPig = new ThriftToPig(a.getClass()); - ExpectationValidatingRecordConsumer recordConsumer = - new ExpectationValidatingRecordConsumer(new ArrayDeque(Arrays.asList(expectations))); - Schema pigSchema = thriftToPig.toSchema(); - LOG.info("{}", pigSchema); - MessageType schema = new PigSchemaConverter().convert(pigSchema); - LOG.info("{}", schema); - TupleWriteSupport tupleWriteSupport = new TupleWriteSupport(pigSchema); - tupleWriteSupport.init((ParquetConfiguration) null); - tupleWriteSupport.prepareForWrite(recordConsumer); - final Tuple pigTuple = thriftToPig.getPigTuple(a); - LOG.info("{}", pigTuple); - tupleWriteSupport.write(pigTuple); - return schema; - } } diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftParquetReaderWriter.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftParquetReaderWriter.java index 57eb26dd8b..f19efd9a3d 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftParquetReaderWriter.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftParquetReaderWriter.java @@ -28,7 +28,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.metadata.CompressionCodecName; -import org.junit.Assert; import org.junit.Test; public class TestThriftParquetReaderWriter { @@ -61,20 +60,5 @@ private void readWriteTest(Boolean useThreeLevelLists) throws IOException { thriftParquetWriter.write(original); thriftParquetWriter.close(); } - - { // read - ThriftParquetReader thriftParquetReader = - new ThriftParquetReader(f, AddressBook.class); - AddressBook read = thriftParquetReader.read(); - Assert.assertEquals(original, read); - thriftParquetReader.close(); - } - - { // read without providing a thrift class - ThriftParquetReader thriftParquetReader = new ThriftParquetReader(f); - AddressBook read = thriftParquetReader.read(); - Assert.assertEquals(original, read); - thriftParquetReader.close(); - } } } diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftRecordConverter.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftRecordConverter.java index 46c2acd847..f7872817eb 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftRecordConverter.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftRecordConverter.java @@ -26,6 +26,7 @@ import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; +import org.apache.hadoop.conf.Configuration; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.api.Binary; import org.apache.parquet.thrift.ThriftRecordConverter.FieldEnumConverter; @@ -100,6 +101,7 @@ public StructWithUnionV1 readOneRecord(TProtocol protocol) throws TException { }, "name", new ThriftSchemaConverter().convert(StructWithUnionV1.class), - noStructOrUnionMeta); + noStructOrUnionMeta, + new Configuration()); } } diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConvertVisitor.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConvertVisitor.java deleted file mode 100644 index 47104ca2f4..0000000000 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConvertVisitor.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift; - -import static org.apache.parquet.schema.Type.Repetition; -import static org.apache.parquet.thrift.struct.ThriftField.Requirement; -import static org.junit.Assert.assertEquals; - -import java.util.ArrayList; -import java.util.List; -import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; -import org.apache.parquet.schema.Type; -import org.apache.parquet.schema.Types; -import org.apache.parquet.thrift.projection.FieldProjectionFilter; -import org.apache.parquet.thrift.struct.ThriftField; -import org.apache.parquet.thrift.struct.ThriftType; -import org.apache.parquet.thrift.struct.ThriftType.StructType; -import org.apache.parquet.thrift.struct.ThriftType.StructType.StructOrUnionType; -import org.junit.Test; - -public class TestThriftSchemaConvertVisitor { - - private MessageType buildOneFieldParquetMessage(Type expectedParquetField) { - return Types.buildMessage().addFields(expectedParquetField).named("ParquetSchema"); - } - - private StructType buildOneFieldThriftStructType(String fieldName, Short fieldId, ThriftType thriftType) { - ThriftField inputThriftField = new ThriftField(fieldName, fieldId, Requirement.REQUIRED, thriftType); - List fields = new ArrayList(1); - fields.add(inputThriftField); - return new StructType(fields, StructOrUnionType.STRUCT); - } - - @Test - public void testConvertBasicI32Type() { - String fieldName = "i32Type"; - Short fieldId = 0; - - ThriftType i32Type = new ThriftType.I32Type(); - - StructType thriftStruct = buildOneFieldThriftStructType(fieldName, fieldId, i32Type); - MessageType actual = ThriftSchemaConvertVisitor.convert(thriftStruct, FieldProjectionFilter.ALL_COLUMNS); - - Type expectedParquetField = Types.primitive(PrimitiveTypeName.INT32, Repetition.REQUIRED) - .named(fieldName) - .withId(fieldId); - MessageType expected = buildOneFieldParquetMessage(expectedParquetField); - - assertEquals(expected, actual); - } - - @Test - public void testConvertLogicalI32Type() { - LogicalTypeAnnotation timeLogicalType = LogicalTypeAnnotation.timeType(true, TimeUnit.MILLIS); - String fieldName = "timeI32Type"; - Short fieldId = 0; - - ThriftType timeI32Type = new ThriftType.I32Type(); - timeI32Type.setLogicalTypeAnnotation(timeLogicalType); - - StructType thriftStruct = buildOneFieldThriftStructType(fieldName, fieldId, timeI32Type); - MessageType actual = ThriftSchemaConvertVisitor.convert(thriftStruct, FieldProjectionFilter.ALL_COLUMNS); - - Type expectedParquetField = Types.primitive(PrimitiveTypeName.INT32, Repetition.REQUIRED) - .as(timeLogicalType) - .named(fieldName) - .withId(fieldId); - MessageType expected = buildOneFieldParquetMessage(expectedParquetField); - - assertEquals(expected, actual); - } - - @Test - public void testConvertBasicI64Type() { - String fieldName = "i64Type"; - Short fieldId = 0; - - ThriftType i64Type = new ThriftType.I64Type(); - - StructType thriftStruct = buildOneFieldThriftStructType(fieldName, fieldId, i64Type); - MessageType actual = ThriftSchemaConvertVisitor.convert(thriftStruct, FieldProjectionFilter.ALL_COLUMNS); - - Type expectedParquetField = Types.primitive(PrimitiveTypeName.INT64, Repetition.REQUIRED) - .named(fieldName) - .withId(fieldId); - MessageType expected = buildOneFieldParquetMessage(expectedParquetField); - - assertEquals(expected, actual); - } - - @Test - public void testConvertLogicalI64Type() { - LogicalTypeAnnotation timestampLogicalType = LogicalTypeAnnotation.timestampType(true, TimeUnit.MILLIS); - String fieldName = "logicalI64Type"; - Short fieldId = 0; - - ThriftType timestampI64Type = new ThriftType.I64Type(); - timestampI64Type.setLogicalTypeAnnotation(timestampLogicalType); - - StructType thriftStruct = buildOneFieldThriftStructType(fieldName, fieldId, timestampI64Type); - MessageType actual = ThriftSchemaConvertVisitor.convert(thriftStruct, FieldProjectionFilter.ALL_COLUMNS); - - Type expectedParquetField = Types.primitive(PrimitiveTypeName.INT64, Repetition.REQUIRED) - .as(timestampLogicalType) - .named(fieldName) - .withId(fieldId); - MessageType expected = buildOneFieldParquetMessage(expectedParquetField); - - assertEquals(expected, actual); - } - - @Test - public void testConvertStringType() { - LogicalTypeAnnotation stringLogicalType = LogicalTypeAnnotation.stringType(); - String fieldName = "stringType"; - Short fieldId = 0; - - ThriftType stringType = new ThriftType.StringType(); - - StructType thriftStruct = buildOneFieldThriftStructType(fieldName, fieldId, stringType); - MessageType actual = ThriftSchemaConvertVisitor.convert(thriftStruct, FieldProjectionFilter.ALL_COLUMNS); - - Type expectedParquetField = Types.primitive(PrimitiveTypeName.BINARY, Repetition.REQUIRED) - .as(stringLogicalType) - .named(fieldName) - .withId(fieldId); - MessageType expected = buildOneFieldParquetMessage(expectedParquetField); - - assertEquals(expected, actual); - } - - @Test - public void testConvertLogicalBinaryType() { - LogicalTypeAnnotation jsonLogicalType = LogicalTypeAnnotation.jsonType(); - String fieldName = "logicalBinaryType"; - Short fieldId = 0; - - ThriftType.StringType jsonBinaryType = new ThriftType.StringType(); - jsonBinaryType.setBinary(true); - jsonBinaryType.setLogicalTypeAnnotation(jsonLogicalType); - - StructType thriftStruct = buildOneFieldThriftStructType(fieldName, fieldId, jsonBinaryType); - MessageType actual = ThriftSchemaConvertVisitor.convert(thriftStruct, FieldProjectionFilter.ALL_COLUMNS); - - Type expectedParquetField = Types.primitive(PrimitiveTypeName.BINARY, Repetition.REQUIRED) - .as(jsonLogicalType) - .named(fieldName) - .withId(fieldId); - MessageType expected = buildOneFieldParquetMessage(expectedParquetField); - - assertEquals(expected, actual); - } -} diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverter.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverter.java index 8f9faf160f..5a930bd25f 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverter.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverter.java @@ -19,20 +19,16 @@ package org.apache.parquet.thrift; import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; -import static org.apache.parquet.thrift.struct.ThriftField.Requirement.REQUIRED; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import com.twitter.data.proto.tutorial.thrift.AddressBook; import com.twitter.data.proto.tutorial.thrift.Person; import com.twitter.elephantbird.thrift.test.TestStructInMap; -import java.util.Arrays; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import org.apache.parquet.thrift.projection.StrictFieldProjectionFilter; import org.apache.parquet.thrift.projection.ThriftProjectionException; -import org.apache.parquet.thrift.projection.deprecated.DeprecatedFieldProjectionFilter; -import org.apache.parquet.thrift.struct.ThriftField; import org.apache.parquet.thrift.struct.ThriftType; import org.apache.parquet.thrift.struct.ThriftType.StructType; import org.apache.parquet.thrift.test.TestLogicalType; @@ -233,7 +229,7 @@ public void testProjectOnlyKeyInMap() { private void shouldThrowWhenProjectionFilterMatchesNothing( String filters, String unmatchedFilter, Class> thriftClass) { try { - getDeprecatedFilteredSchema(filters, thriftClass); + getStrictFilteredSchema(filters, thriftClass); fail("should throw projection exception when filter matches nothing"); } catch (ThriftProjectionException e) { assertEquals( @@ -245,8 +241,8 @@ private void shouldThrowWhenProjectionFilterMatchesNothing( private void shouldThrowWhenNoColumnsAreSelected(String filters, Class> thriftClass) { try { - getDeprecatedFilteredSchema(filters, thriftClass); - fail("should throw projection exception when no columns are selected"); + getStrictFilteredSchema(filters, thriftClass); + fail("this should throw"); } catch (ThriftProjectionException e) { assertEquals("No columns have been selected", e.getMessage()); } @@ -259,24 +255,25 @@ public void testThrowWhenNoColumnsAreSelected() { @Test public void testThrowWhenProjectionFilterMatchesNothing() { - shouldThrowWhenProjectionFilterMatchesNothing("name;non_existing", "non_existing", TestStructInMap.class); - shouldThrowWhenProjectionFilterMatchesNothing("**;non_existing", "non_existing", TestStructInMap.class); shouldThrowWhenProjectionFilterMatchesNothing( - "**;names/non_existing", "names/non_existing", TestStructInMap.class); + "name;non_existing", + "Pattern: 'non_existing' (when expanded to 'non_existing')", + TestStructInMap.class); + shouldThrowWhenProjectionFilterMatchesNothing( + "**;non_existing", "Pattern: 'non_existing' (when expanded to 'non_existing')", TestStructInMap.class); + shouldThrowWhenProjectionFilterMatchesNothing( + "**;names/non_existing", + "Pattern: 'names/non_existing' (when expanded to 'names/non_existing')", + TestStructInMap.class); shouldThrowWhenProjectionFilterMatchesNothing( - "**;names/non_existing;non_existing", "names/non_existing\nnon_existing", TestStructInMap.class); + "**;names/non_existing;non_existing", + "Pattern: 'names/non_existing' (when expanded to 'names/non_existing')\n" + + "Pattern: 'non_existing' (when expanded to 'non_existing')", + TestStructInMap.class); } @Test public void testProjectOnlyValueInMap() { - try { - getDeprecatedFilteredSchema("name;names/value/**", TestStructInMap.class); - fail("this should throw"); - } catch (ThriftProjectionException e) { - assertEquals( - "Cannot select only the values of a map, you must keep the keys as well: names", e.getMessage()); - } - try { getStrictFilteredSchema("name;names.value", TestStructInMap.class); fail("this should throw"); @@ -287,13 +284,6 @@ public void testProjectOnlyValueInMap() { } private void doTestPartialKeyProjection(String deprecated, String strict) { - try { - getDeprecatedFilteredSchema(deprecated, MapStructV2.class); - fail("this should throw"); - } catch (ThriftProjectionException e) { - assertEquals("Cannot select only a subset of the fields in a map key, for path map1", e.getMessage()); - } - try { getStrictFilteredSchema(strict, MapStructV2.class); fail("this should throw"); @@ -310,13 +300,6 @@ public void testPartialKeyProjection() { @Test public void testSetPartialProjection() { - try { - getDeprecatedFilteredSchema("set1/age", SetStructV2.class); - fail("this should throw"); - } catch (ThriftProjectionException e) { - assertEquals("Cannot select only a subset of the fields in a set, for path set1", e.getMessage()); - } - try { getStrictFilteredSchema("set1.age", SetStructV2.class); fail("this should throw"); @@ -325,40 +308,16 @@ public void testSetPartialProjection() { } } - @Test - public void testConvertStructCreatedViaDeprecatedConstructor() { - String expected = "message ParquetSchema {\n" + " required binary a (UTF8) = 1;\n" - + " required binary b (UTF8) = 2;\n" - + "}\n"; - - ThriftSchemaConverter converter = new ThriftSchemaConverter(); - - StructType structType = new StructType(Arrays.asList( - new ThriftField("a", (short) 1, REQUIRED, new ThriftType.StringType()), - new ThriftField("b", (short) 2, REQUIRED, new ThriftType.StringType()))); - - final MessageType converted = converter.convert(structType); - assertEquals(MessageTypeParser.parseMessageType(expected), converted); - } - public static void shouldGetProjectedSchema( String deprecatedFilterDesc, String strictFilterDesc, String expectedSchemaStr, Class> thriftClass) { - MessageType depRequestedSchema = getDeprecatedFilteredSchema(deprecatedFilterDesc, thriftClass); MessageType strictRequestedSchema = getStrictFilteredSchema(strictFilterDesc, thriftClass); MessageType expectedSchema = parseMessageType(expectedSchemaStr); - assertEquals(expectedSchema, depRequestedSchema); assertEquals(expectedSchema, strictRequestedSchema); } - private static MessageType getDeprecatedFilteredSchema( - String filterDesc, Class> thriftClass) { - DeprecatedFieldProjectionFilter fieldProjectionFilter = new DeprecatedFieldProjectionFilter(filterDesc); - return new ThriftSchemaConverter(fieldProjectionFilter).convert(thriftClass); - } - private static MessageType getStrictFilteredSchema( String semicolonDelimitedString, Class> thriftClass) { StrictFieldProjectionFilter fieldProjectionFilter = diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java deleted file mode 100644 index 43cc52673d..0000000000 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift; - -import static org.junit.Assert.assertEquals; - -import com.twitter.data.proto.tutorial.thrift.AddressBook; -import com.twitter.data.proto.tutorial.thrift.Name; -import com.twitter.data.proto.tutorial.thrift.Person; -import com.twitter.data.proto.tutorial.thrift.PhoneNumber; -import com.twitter.data.proto.tutorial.thrift.PhoneType; -import com.twitter.elephantbird.pig.util.ThriftToPig; -import com.twitter.elephantbird.thrift.test.TestMap; -import com.twitter.elephantbird.thrift.test.TestMapInSet; -import com.twitter.elephantbird.thrift.test.TestName; -import com.twitter.elephantbird.thrift.test.TestNameList; -import com.twitter.elephantbird.thrift.test.TestPerson; -import com.twitter.elephantbird.thrift.test.TestPhoneType; -import com.twitter.elephantbird.thrift.test.TestStructInMap; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import org.apache.parquet.io.ColumnIOFactory; -import org.apache.parquet.io.ConverterConsumer; -import org.apache.parquet.io.MessageColumnIO; -import org.apache.parquet.io.RecordConsumerLoggingWrapper; -import org.apache.parquet.io.api.RecordConsumer; -import org.apache.parquet.pig.PigSchemaConverter; -import org.apache.parquet.pig.convert.TupleRecordMaterializer; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.thrift.struct.ThriftType.StructType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.thrift.TBase; -import org.apache.thrift.TException; -import org.junit.Test; -import thrift.test.OneOfEach; - -public class TestThriftToPigCompatibility { - - public void testMap() throws Exception { - Map map = new TreeMap(); - map.put("foo", "bar"); - map.put("foo2", "bar2"); - TestMap testMap = new TestMap("map_name", map); - validateSameTupleAsEB(testMap); - } - - @Test - public void testMapInSet() throws Exception { - final Set> set = new HashSet>(); - final Map map = new HashMap(); - map.put("foo", "bar"); - set.add(map); - TestMapInSet o = new TestMapInSet("top", set); - validateSameTupleAsEB(o); - } - - @Test - public void testStructInMap() throws Exception { - - final Map map = new HashMap(); - map.put("foo", new TestPerson(new TestName("john", "johnson"), new HashMap())); - final Map stringToIntMap = Collections.singletonMap("bar", 10); - TestStructInMap testMap = new TestStructInMap("map_name", map, stringToIntMap); - validateSameTupleAsEB(testMap); - } - - @Test - public void testProtocolEmptyAdressBook() throws Exception { - - AddressBook a = new AddressBook(new ArrayList()); - validateSameTupleAsEB(a); - } - - @Test - public void testProtocolAddressBook() throws Exception { - ArrayList persons = new ArrayList(); - final PhoneNumber phoneNumber = new PhoneNumber("555 999 9998"); - phoneNumber.type = PhoneType.HOME; - persons.add(new Person( - new Name("Bob", "Roberts"), - 1, - "bob@roberts.com", - Arrays.asList(new PhoneNumber("555 999 9999"), phoneNumber))); - persons.add(new Person( - new Name("Dick", "Richardson"), - 2, - "dick@richardson.com", - Arrays.asList(new PhoneNumber("555 999 9997"), new PhoneNumber("555 999 9996")))); - AddressBook a = new AddressBook(persons); - validateSameTupleAsEB(a); - } - - @Test - public void testOneOfEach() throws Exception { - OneOfEach a = new OneOfEach( - true, - false, - (byte) 8, - (short) 16, - (int) 32, - (long) 64, - (double) 1234, - "string", - "å", - false, - ByteBuffer.wrap("a".getBytes()), - new ArrayList(), - new ArrayList(), - new ArrayList()); - validateSameTupleAsEB(a); - } - - @Test - public void testStringList() throws Exception { - final List names = new ArrayList(); - names.add("John"); - names.add("Jack"); - TestNameList o = new TestNameList("name", names); - validateSameTupleAsEB(o); - } - - /** - *

    steps: - *
  • Writes using the thrift mapping - *
  • Reads using the pig mapping - *
  • Use Elephant bird to convert from thrift to pig - *
  • Check that both transformations give the same result - * - * @param o the object to convert - * @throws TException - */ - public static > void validateSameTupleAsEB(T o) throws TException { - final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter(); - @SuppressWarnings("unchecked") - final Class class1 = (Class) o.getClass(); - final MessageType schema = thriftSchemaConverter.convert(class1); - - final StructType structType = ThriftSchemaConverter.toStructType(class1); - final ThriftToPig thriftToPig = new ThriftToPig(class1); - final Schema pigSchema = thriftToPig.toSchema(); - final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true); - RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema); - final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema); - ParquetWriteProtocol p = - new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType); - o.write(p); - final Tuple t = tupleRecordConverter.getCurrentRecord(); - final Tuple expected = thriftToPig.getPigTuple(o); - assertEquals(expected.toString(), t.toString()); - final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema); - assertEquals(schema.toString(), filtered.toString()); - } -} diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java deleted file mode 100644 index c021cd2d22..0000000000 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.pig; - -import static org.apache.pig.builtin.mock.Storage.tuple; -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Properties; -import org.apache.parquet.pig.ParquetLoader; -import org.apache.parquet.thrift.test.Name; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecException; -import org.apache.pig.backend.executionengine.ExecJob.JOB_STATUS; -import org.apache.pig.builtin.mock.Storage; -import org.apache.pig.builtin.mock.Storage.Data; -import org.apache.pig.data.Tuple; -import org.junit.Test; - -public class TestParquetThriftStorer { - @Test - public void testStorer() throws ExecException, Exception { - String out = "target/out"; - int rows = 1000; - Properties props = new Properties(); - props.setProperty("parquet.compression", "uncompressed"); - props.setProperty("parquet.page.size", "1000"); - PigServer pigServer = new PigServer(ExecType.LOCAL, props); - Data data = Storage.resetData(pigServer); - Collection list = new ArrayList(); - for (int i = 0; i < rows; i++) { - list.add(tuple("bob", "roberts" + i)); - } - data.set("in", "fn:chararray, ln:chararray", list); - pigServer.deleteFile(out); - pigServer.setBatchOn(); - pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); - pigServer.registerQuery("Store A into '" + out + "' using " + ParquetThriftStorer.class.getName() + "('" - + Name.class.getName() + "');"); - execBatch(pigServer); - - pigServer.registerQuery("B = LOAD '" + out + "' USING " + ParquetLoader.class.getName() + "();"); - pigServer.registerQuery("Store B into 'out' using mock.Storage();"); - execBatch(pigServer); - - List result = data.get("out"); - - assertEquals(rows, result.size()); - int i = 0; - for (Tuple tuple : result) { - assertEquals(tuple("bob", "roberts" + i), tuple); - ++i; - } - } - - private void execBatch(PigServer pigServer) throws IOException { - if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { - throw new RuntimeException( - "Job failed", pigServer.executeBatch().get(0).getException()); - } - } -} diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPatternTest.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPatternTest.java deleted file mode 100644 index baf1f72d3d..0000000000 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPatternTest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.projection.deprecated; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import org.junit.Test; - -/** - * Test using glob syntax to specify which attribute to retain - */ -public class PathGlobPatternTest { - @Test - public void testRecursiveGlob() { - PathGlobPattern g = new PathGlobPattern("a/**/b"); - assertFalse(g.matches("a/b")); - assertTrue(g.matches("a/asd/b")); - assertTrue(g.matches("a/asd/ss/b")); - - g = new PathGlobPattern("a/**"); - assertTrue(g.matches("a/as")); - assertTrue(g.matches("a/asd/b")); - assertTrue(g.matches("a/asd/ss/b")); - } - - @Test - public void testStandardGlob() { - PathGlobPattern g = new PathGlobPattern("a/*"); - assertTrue(g.matches("a/as")); - assertFalse(g.matches("a/asd/b")); - assertFalse(g.matches("a/asd/ss/b")); - - g = new PathGlobPattern("a/{bb,cc}/d"); - assertTrue(g.matches("a/bb/d")); - assertTrue(g.matches("a/cc/d")); - assertFalse(g.matches("a/cc/bb/d")); - assertFalse(g.matches("a/d")); - } -}