Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 0 additions & 38 deletions parquet-thrift/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -120,29 +120,6 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-pig</artifactId>
<version>1.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
<version>${pig.version}</version>
<classifier>${pig.classifier}</classifier>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
<version>${javax.annotation.version}</version>
</dependency>
<dependency> <!-- for pig runtime in tests -->
<groupId>org.antlr</groupId>
<artifactId>antlr-runtime</artifactId>
<version>3.5.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
Expand All @@ -160,28 +137,13 @@
<version>${slf4j.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<!-- needed for Pig tests -->
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<!-- Needed to compile PathGlobPattern on Hadoop 3.
If that deprecated class is removed, so can this dependency -->
<groupId>com.google.re2j</groupId>
<artifactId>re2j</artifactId>
<version>1.7</version>
<scope>provided</scope>
</dependency>
</dependencies>

<dependencyManagement>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/
package org.apache.parquet.hadoop.thrift;

import com.twitter.elephantbird.pig.util.ThriftToPig;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.conf.HadoopParquetConfiguration;
Expand All @@ -25,13 +24,11 @@
import org.apache.parquet.io.ColumnIOFactory;
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.pig.PigMetaData;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.thrift.ParquetWriteProtocol;
import org.apache.parquet.thrift.ThriftMetaData;
import org.apache.parquet.thrift.ThriftSchemaConverter;
import org.apache.parquet.thrift.struct.ThriftType.StructType;
import org.apache.thrift.TBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -99,26 +96,10 @@ protected void init(Class<T> thriftClass) {

final Map<String, String> extraMetaData =
new ThriftMetaData(thriftClass.getName(), thriftStruct).toExtraMetaData();
// adding the Pig schema as it would have been mapped from thrift
// TODO: make this work for non-tbase types
if (isPigLoaded() && TBase.class.isAssignableFrom(thriftClass)) {
new PigMetaData(new ThriftToPig((Class<? extends TBase<?, ?>>) thriftClass).toSchema())
.addToMetaData(extraMetaData);
}

this.writeContext = new WriteContext(schema, extraMetaData);
}

protected boolean isPigLoaded() {
try {
Class.forName("org.apache.pig.impl.logicalLayer.schema.Schema");
return true;
} catch (ClassNotFoundException e) {
LOG.info("Pig is not loaded, pig metadata will not be written");
return false;
}
}

@Override
public WriteContext init(Configuration configuration) {
return init(new HadoopParquetConfiguration(configuration));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,4 @@ public ParquetThriftBytesOutputFormat(
FieldIgnoredHandler errorHandler) {
super(new ThriftBytesWriteSupport(configuration, protocolFactory, thriftClass, buffered, errorHandler));
}

/**
* @param protocolFactory the protocol factory to use to read the bytes
* @param thriftClass thriftClass the class to extract the schema from
* @param buffered whether we should buffer each record
* @param errorHandler handle record corruption and schema incompatible exception
* @deprecated Use @link{ParquetThriftBytesOutputFormat(
* Configuration configuration, TProtocolFactory protocolFactory,
* {@literal Class<\? extends TBase<\?, ?>>} thriftClass, boolean buffered,
* FieldIgnoredHandler errorHandler)} instead.
*/
@Deprecated
public ParquetThriftBytesOutputFormat(
TProtocolFactory protocolFactory,
Class<? extends TBase<?, ?>> thriftClass,
boolean buffered,
FieldIgnoredHandler errorHandler) {
this(new Configuration(), protocolFactory, thriftClass, buffered, errorHandler);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@
public class ParquetThriftOutputFormat<T extends TBase<?, ?>> extends ParquetOutputFormat<T> {

public static void setThriftClass(Job job, Class<? extends TBase<?, ?>> thriftClass) {
ThriftWriteSupport.setThriftClass(ContextUtil.getConfiguration(job), thriftClass);
TBaseWriteSupport.setThriftClass(ContextUtil.getConfiguration(job), thriftClass);
}

public static Class<? extends TBase<?, ?>> getThriftClass(Job job) {
return ThriftWriteSupport.getThriftClass(ContextUtil.getConfiguration(job));
return TBaseWriteSupport.getThriftClass(ContextUtil.getConfiguration(job));
}

public ParquetThriftOutputFormat() {
super(new ThriftWriteSupport<T>());
super(new TBaseWriteSupport<T>());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,20 +99,6 @@ public ThriftBytesWriteSupport() {
this.errorHandler = null;
}

/**
* @deprecated Use @link{ThriftBytesWriteSupport(Configuration configuration,
* TProtocolFactory protocolFactory, {@literal Class<? extends TBase<?,?>>} thriftClass,
* boolean buffered, FieldIgnoredHandler errorHandler)} instead
*/
@Deprecated
public ThriftBytesWriteSupport(
TProtocolFactory protocolFactory,
Class<? extends TBase<?, ?>> thriftClass,
boolean buffered,
FieldIgnoredHandler errorHandler) {
this(new Configuration(), protocolFactory, thriftClass, buffered, errorHandler);
}

public ThriftBytesWriteSupport(
Configuration configuration,
TProtocolFactory protocolFactory,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
import org.apache.parquet.Strings;
import org.apache.parquet.conf.HadoopParquetConfiguration;
import org.apache.parquet.conf.ParquetConfiguration;
Expand All @@ -39,7 +38,6 @@
import org.apache.parquet.thrift.projection.FieldProjectionFilter;
import org.apache.parquet.thrift.projection.StrictFieldProjectionFilter;
import org.apache.parquet.thrift.projection.ThriftProjectionException;
import org.apache.parquet.thrift.projection.deprecated.DeprecatedFieldProjectionFilter;
import org.apache.parquet.thrift.struct.ThriftType.StructType;
import org.apache.thrift.TBase;
import org.apache.thrift.protocol.TProtocol;
Expand All @@ -49,13 +47,6 @@
public class ThriftReadSupport<T> extends ReadSupport<T> {
private static final Logger LOG = LoggerFactory.getLogger(ThriftReadSupport.class);

/**
* Deprecated. Use {@link #STRICT_THRIFT_COLUMN_FILTER_KEY}
* Accepts a ";" delimited list of globs in the syntax implemented by {@link DeprecatedFieldProjectionFilter}
*/
@Deprecated
public static final String THRIFT_COLUMN_FILTER_KEY = "parquet.thrift.column.filter";

/**
* Accepts a ";" delimited list of glob paths, in the syntax implemented by {@link StrictFieldProjectionFilter}
*/
Expand All @@ -73,20 +64,6 @@ public class ThriftReadSupport<T> extends ReadSupport<T> {

protected Class<T> thriftClass;

/**
* A {@link ThriftRecordConverter} builds an object by working with {@link TProtocol}. The default
* implementation creates standard Apache Thrift {@link TBase} objects; to support alternatives, such
* as <a href="http://github.com/twitter/scrooge">Twiter's Scrooge</a>, a custom converter can be specified.
*
* @param conf a mapred jobconf
* @param klass a thrift class
* @deprecated use {@link #setRecordConverterClass(Configuration, Class)} below
*/
@Deprecated
public static void setRecordConverterClass(JobConf conf, Class<?> klass) {
setRecordConverterClass((Configuration) conf, klass);
}

/**
* A {@link ThriftRecordConverter} builds an object by working with {@link TProtocol}. The default
* implementation creates standard Apache Thrift {@link TBase} objects; to support alternatives, such
Expand All @@ -99,11 +76,6 @@ public static void setRecordConverterClass(Configuration conf, Class<?> klass) {
conf.set(RECORD_CONVERTER_CLASS_KEY, klass.getName());
}

@Deprecated
public static void setProjectionPushdown(JobConf jobConf, String projectionString) {
jobConf.set(THRIFT_COLUMN_FILTER_KEY, projectionString);
}

public static void setStrictFieldProjectionFilter(Configuration conf, String semicolonDelimitedGlobs) {
conf.set(STRICT_THRIFT_COLUMN_FILTER_KEY, semicolonDelimitedGlobs);
}
Expand All @@ -113,31 +85,12 @@ public static FieldProjectionFilter getFieldProjectionFilter(Configuration conf)
}

public static FieldProjectionFilter getFieldProjectionFilter(ParquetConfiguration conf) {
String deprecated = conf.get(THRIFT_COLUMN_FILTER_KEY);
String strict = conf.get(STRICT_THRIFT_COLUMN_FILTER_KEY);

if (Strings.isNullOrEmpty(deprecated) && Strings.isNullOrEmpty(strict)) {
if (Strings.isNullOrEmpty(strict)) {
return null;
}

if (!Strings.isNullOrEmpty(deprecated) && !Strings.isNullOrEmpty(strict)) {
throw new ThriftProjectionException("You cannot provide both "
+ THRIFT_COLUMN_FILTER_KEY
+ " and "
+ STRICT_THRIFT_COLUMN_FILTER_KEY
+ "! "
+ THRIFT_COLUMN_FILTER_KEY
+ " is deprecated.");
}

if (!Strings.isNullOrEmpty(deprecated)) {
LOG.warn(
"Using {} is deprecated. Please see the docs for {}!",
THRIFT_COLUMN_FILTER_KEY,
STRICT_THRIFT_COLUMN_FILTER_KEY);
return new DeprecatedFieldProjectionFilter(deprecated);
}

return StrictFieldProjectionFilter.fromSemicolonDelimitedString(strict);
}

Expand Down Expand Up @@ -166,8 +119,8 @@ public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext co
if (partialSchemaString != null && projectionFilter != null) {
throw new ThriftProjectionException(String.format(
"You cannot provide both a partial schema and field projection filter."
+ "Only one of (%s, %s, %s) should be set.",
PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY, THRIFT_COLUMN_FILTER_KEY));
+ "Only one of (%s, %s) should be set.",
PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY));
}

// set requestedProjections only when it's specified
Expand Down Expand Up @@ -197,13 +150,6 @@ protected MessageType getProjectedSchema(
.convert((Class<TBase<?, ?>>) thriftClass);
}

@Deprecated
@SuppressWarnings("unchecked")
protected MessageType getProjectedSchema(FieldProjectionFilter fieldProjectionFilter) {
return new ThriftSchemaConverter(new Configuration(), fieldProjectionFilter)
.convert((Class<TBase<?, ?>>) thriftClass);
}

private void initThriftClassFromMultipleFiles(Map<String, Set<String>> fileMetadata, Configuration conf)
throws ClassNotFoundException {
initThriftClassFromMultipleFiles(fileMetadata, new HadoopParquetConfiguration(conf));
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,6 @@

public class TBaseRecordConverter<T extends TBase<?, ?>> extends ThriftRecordConverter<T> {

/**
* This is for compatibility only.
*
* @param thriftClass a thrift class
* @param requestedParquetSchema the requested Parquet schema
* @param thriftType the thrift type
* @deprecated will be removed in 2.x
*/
@Deprecated
public TBaseRecordConverter(final Class<T> thriftClass, MessageType requestedParquetSchema, StructType thriftType) {
this(thriftClass, requestedParquetSchema, thriftType, (HadoopParquetConfiguration) null);
}

@SuppressWarnings("unused")
public TBaseRecordConverter(
final Class<T> thriftClass, MessageType requestedParquetSchema, StructType thriftType, Configuration conf) {
Expand Down
Loading
Loading