Skip to content

Commit ef35bf5

Browse files
committed
DRILL-7223: Create an option to control timeout for REFRESH METADATA
1 parent 887dee2 commit ef35bf5

File tree

7 files changed

+51
-18
lines changed

7 files changed

+51
-18
lines changed

exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java

+4
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,10 @@ private ExecConstants() {
354354
"enables statistics usage for varchar and decimal data types. Default is unset, i.e. empty string. " +
355355
"Allowed values: 'true', 'false', '' (empty string)."), "true", "false", "");
356356

357+
public static final String PARQUET_REFRESH_TIMEOUT = "store.parquet.refresh_timeout_per_runnable_in_msec";
358+
public static final LongValidator PARQUET_REFRESH_TIMEOUT_VALIDATOR = new LongValidator(PARQUET_REFRESH_TIMEOUT,
359+
new OptionDescription("Sets a timeout (in msec) for REFRESH TABLE METADATA processing of a single subdirectory"));
360+
357361
public static final String PARQUET_PAGEREADER_ASYNC = "store.parquet.reader.pagereader.async";
358362
public static final OptionValidator PARQUET_PAGEREADER_ASYNC_VALIDATOR = new BooleanValidator(PARQUET_PAGEREADER_ASYNC,
359363
new OptionDescription("Enable the asynchronous page reader. This pipelines the reading of data from disk for high performance."));

exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java

+1
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ public static CaseInsensitiveMap<OptionDefinition> createDefaultOptionDefinition
174174
new OptionDefinition(ExecConstants.PARQUET_PAGEREADER_USE_FADVISE_VALIDATOR),
175175
new OptionDefinition(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP_VALIDATOR),
176176
new OptionDefinition(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX_VALIDATOR),
177+
new OptionDefinition(ExecConstants.PARQUET_REFRESH_TIMEOUT_VALIDATOR),
177178
new OptionDefinition(ExecConstants.PARQUET_FLAT_READER_BULK_VALIDATOR),
178179
new OptionDefinition(ExecConstants.PARQUET_FLAT_BATCH_NUM_RECORDS_VALIDATOR, new OptionMetaData(OptionValue.AccessibleScopes.SYSTEM_AND_SESSION, true, true)),
179180
new OptionDefinition(ExecConstants.PARQUET_FLAT_BATCH_MEMORY_SIZE_VALIDATOR, new OptionMetaData(OptionValue.AccessibleScopes.SYSTEM_AND_SESSION, true, true)),

exec/java-exec/src/main/java/org/apache/drill/exec/store/TimedCallable.java

+23-7
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,13 @@ public final V call() throws Exception {
166166
throw e;
167167
} finally {
168168
long time = System.nanoTime() - start;
169-
if (logger.isWarnEnabled()) {
170-
long timeMillis = TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS);
171-
if (timeMillis > TIMEOUT_PER_RUNNABLE_IN_MSECS) {
172-
logger.warn("Task '{}' execution time {} ms exceeds timeout {} ms.", this, timeMillis, TIMEOUT_PER_RUNNABLE_IN_MSECS);
173-
} else {
174-
logger.debug("Task '{}' execution time is {} ms", this, timeMillis);
175-
}
169+
long timeMillis = TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS);
170+
if (timeMillis > TIMEOUT_PER_RUNNABLE_IN_MSECS) {
171+
logger.warn("Task '{}' execution time {} ms exceeds timeout {} ms.", this, timeMillis, TIMEOUT_PER_RUNNABLE_IN_MSECS);
172+
} else {
173+
logger.debug("Task '{}' execution time is {} ms", this, timeMillis);
176174
}
175+
177176
executionTime = time;
178177
}
179178
}
@@ -188,6 +187,23 @@ private long getExecutionTime(TimeUnit unit) {
188187
return unit.convert(executionTime, TimeUnit.NANOSECONDS);
189188
}
190189

190+
/**
191+
* Execute the list of runnables with the given parallelization. At end, return values and report completion time
192+
* stats to provided logger. Each runnable is allowed a certain timeout. If the timeout exceeds, existing/pending
193+
* tasks will be cancelled and a {@link UserException} is thrown.
194+
* @param activity Name of activity for reporting in logger.
195+
* @param logger The logger to use to report results.
196+
* @param tasks List of callable that should be executed and timed. If this list has one item, task will be
197+
* completed in-thread. Each callable must handle {@link InterruptedException}s.
198+
* @param parallelism The number of threads that should be run to complete this task.
199+
* @param timeout if bigger than zero, set the timeout per runnable (in msec)
200+
* @return The list of outcome objects.
201+
* @throws IOException All exceptions are coerced to IOException since this was build for storage system tasks initially.
202+
*/
203+
public static <V> List<V> run(final String activity, final Logger logger, final List<TimedCallable<V>> tasks, int parallelism, long timeout) throws IOException {
204+
TIMEOUT_PER_RUNNABLE_IN_MSECS = timeout;
205+
return run(activity, logger, tasks, parallelism);
206+
}
191207

192208
/**
193209
* Execute the list of runnables with the given parallelization. At end, return values and report completion time

exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderConfig.java

+18-6
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class ParquetReaderConfig {
5050
private boolean enableTimeReadCounter = false;
5151
private boolean autoCorrectCorruptedDates = true;
5252
private boolean enableStringsSignedMinMax = false;
53+
private long timeoutPerRunnableInMsec = 15_000;
5354

5455
public static ParquetReaderConfig.Builder builder() {
5556
return new ParquetReaderConfig.Builder();
@@ -64,12 +65,15 @@ public ParquetReaderConfig(@JsonProperty("enableBytesReadCounter") Boolean enabl
6465
@JsonProperty("enableBytesTotalCounter") Boolean enableBytesTotalCounter,
6566
@JsonProperty("enableTimeReadCounter") Boolean enableTimeReadCounter,
6667
@JsonProperty("autoCorrectCorruptedDates") Boolean autoCorrectCorruptedDates,
67-
@JsonProperty("enableStringsSignedMinMax") Boolean enableStringsSignedMinMax) {
68+
@JsonProperty("enableStringsSignedMinMax") Boolean enableStringsSignedMinMax,
69+
@JsonProperty("timeoutPerRunnableInMsec") Long timeoutPerRunnableInMsec) {
6870
this.enableBytesReadCounter = enableBytesReadCounter == null ? this.enableBytesReadCounter : enableBytesReadCounter;
6971
this.enableBytesTotalCounter = enableBytesTotalCounter == null ? this.enableBytesTotalCounter : enableBytesTotalCounter;
7072
this.enableTimeReadCounter = enableTimeReadCounter == null ? this.enableTimeReadCounter : enableTimeReadCounter;
7173
this.autoCorrectCorruptedDates = autoCorrectCorruptedDates == null ? this.autoCorrectCorruptedDates : autoCorrectCorruptedDates;
7274
this.enableStringsSignedMinMax = enableStringsSignedMinMax == null ? this.enableStringsSignedMinMax : enableStringsSignedMinMax;
75+
this.timeoutPerRunnableInMsec = timeoutPerRunnableInMsec == null || Long.valueOf(timeoutPerRunnableInMsec) <= 0 ? // zero means: use default
76+
this.timeoutPerRunnableInMsec : timeoutPerRunnableInMsec;
7377
}
7478

7579
private ParquetReaderConfig() { }
@@ -99,6 +103,9 @@ public boolean enableStringsSignedMinMax() {
99103
return enableStringsSignedMinMax;
100104
}
101105

106+
@JsonProperty("timeoutPerRunnableInMsec")
107+
public long timeoutPerRunnableInMsec() { return timeoutPerRunnableInMsec; }
108+
102109
public ParquetReadOptions toReadOptions() {
103110
return ParquetReadOptions.builder()
104111
.useSignedStringMinMax(enableStringsSignedMinMax)
@@ -119,7 +126,8 @@ public int hashCode() {
119126
enableBytesTotalCounter,
120127
enableTimeReadCounter,
121128
autoCorrectCorruptedDates,
122-
enableStringsSignedMinMax);
129+
enableStringsSignedMinMax,
130+
timeoutPerRunnableInMsec);
123131
}
124132

125133
@Override
@@ -135,7 +143,8 @@ public boolean equals(Object o) {
135143
&& enableBytesTotalCounter == that.enableBytesTotalCounter
136144
&& enableTimeReadCounter == that.enableTimeReadCounter
137145
&& autoCorrectCorruptedDates == that.autoCorrectCorruptedDates
138-
&& enableStringsSignedMinMax == that.enableStringsSignedMinMax;
146+
&& enableStringsSignedMinMax == that.enableStringsSignedMinMax
147+
&& timeoutPerRunnableInMsec == that.timeoutPerRunnableInMsec;
139148
}
140149

141150
@Override
@@ -146,6 +155,7 @@ public String toString() {
146155
+ ", enableTimeReadCounter=" + enableTimeReadCounter
147156
+ ", autoCorrectCorruptedDates=" + autoCorrectCorruptedDates
148157
+ ", enableStringsSignedMinMax=" + enableStringsSignedMinMax
158+
+ ", timeoutPerRunnableInMsec=" + timeoutPerRunnableInMsec
149159
+ '}';
150160
}
151161

@@ -188,10 +198,12 @@ public ParquetReaderConfig build() {
188198

189199
// last assign values from session options, session options have higher priority than other configurations
190200
if (options != null) {
191-
String option = options.getOption(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX_VALIDATOR);
192-
if (!option.isEmpty()) {
193-
readerConfig.enableStringsSignedMinMax = Boolean.valueOf(option);
201+
String optionSignedMinMax = options.getOption(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX_VALIDATOR);
202+
if (!optionSignedMinMax.isEmpty()) {
203+
readerConfig.enableStringsSignedMinMax = Boolean.valueOf(optionSignedMinMax);
194204
}
205+
Long optionTimeout = options.getOption(ExecConstants.PARQUET_REFRESH_TIMEOUT_VALIDATOR);
206+
readerConfig.timeoutPerRunnableInMsec = Long.valueOf(optionTimeout);
195207
}
196208

197209
return readerConfig;

exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,7 @@ private ParquetTableMetadata_v4 getParquetTableMetadata(Map<FileStatus, FileSyst
444444
private List<ParquetFileAndRowCountMetadata> getParquetFileMetadata_v4(ParquetTableMetadata_v4 parquetTableMetadata_v4, Map<FileStatus, FileSystem> fileStatusMap, boolean allColumnsInteresting, Set<String> columnSet) throws IOException {
445445
return TimedCallable.run("Fetch parquet metadata", logger,
446446
Collectors.toList(fileStatusMap,
447-
(fileStatus, fileSystem) -> new MetadataGatherer(parquetTableMetadata_v4, fileStatus, fileSystem, allColumnsInteresting, columnSet)),
448-
16
447+
(fileStatus, fileSystem) -> new MetadataGatherer(parquetTableMetadata_v4, fileStatus, fileSystem, allColumnsInteresting, columnSet)), 16, readerConfig.timeoutPerRunnableInMsec()
449448
);
450449
}
451450

exec/java-exec/src/main/resources/drill-module.conf

+1
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,7 @@ drill.exec.options: {
629629
store.parquet.reader.columnreader.async: false,
630630
store.parquet.reader.int96_as_timestamp: false,
631631
store.parquet.reader.strings_signed_min_max: "",
632+
store.parquet.refresh_timeout_per_runnable_in_msec: 15000,
632633
store.parquet.reader.pagereader.async: true,
633634
store.parquet.reader.pagereader.bufferedread: true,
634635
store.parquet.reader.pagereader.buffersize: 1048576,

exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetReaderConfig.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public void testDefaultsDeserialization() throws Exception {
4545

4646
// change the default: set autoCorrectCorruptedDates to false
4747
// keep the default: set enableStringsSignedMinMax to false
48-
readerConfig = new ParquetReaderConfig(false, false, false, false, false);
48+
readerConfig = new ParquetReaderConfig(false, false, false, false, false, 0L);
4949

5050
value = mapper.writeValueAsString(readerConfig);
5151
assertEquals("{\"autoCorrectCorruptedDates\":false}", value);
@@ -79,12 +79,12 @@ public void testAddConfigToConf() {
7979
@Test
8080
public void testReadOptions() {
8181
// set enableStringsSignedMinMax to true
82-
ParquetReaderConfig readerConfig = new ParquetReaderConfig(false, false, false, true, true);
82+
ParquetReaderConfig readerConfig = new ParquetReaderConfig(false, false, false, true, true, 12345L);
8383
ParquetReadOptions readOptions = readerConfig.toReadOptions();
8484
assertTrue(readOptions.useSignedStringMinMax());
8585

8686
// set enableStringsSignedMinMax to false
87-
readerConfig = new ParquetReaderConfig(false, false, false, true, false);
87+
readerConfig = new ParquetReaderConfig(false, false, false, true, false, 12345L);
8888
readOptions = readerConfig.toReadOptions();
8989
assertFalse(readOptions.useSignedStringMinMax());
9090
}

0 commit comments

Comments
 (0)