Skip to content

Commit

Permalink
Error management for Wrangler Transform plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
Amit-CloudSufi committed Jan 29, 2025
1 parent 0f6e1b4 commit 1613e69
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 40 deletions.
43 changes: 29 additions & 14 deletions wrangler-transform/src/main/java/io/cdap/wrangler/Precondition.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@

package io.cdap.wrangler;

import io.cdap.cdap.api.exception.ErrorType;
import io.cdap.cdap.api.exception.ProgramFailureException;
import io.cdap.wrangler.api.Row;
import org.apache.commons.jexl3.scripting.JexlScriptEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.script.Bindings;
import javax.script.CompiledScript;
Expand All @@ -30,6 +34,8 @@
* A precondition expression that filters data into the directives.
*/
public class Precondition {
private static final Logger LOG = LoggerFactory.getLogger(Precondition.class);

private final String condition;
private final CompiledScript script;
// SimpleScriptContext is pretty expensive to construct due to all PrintWriter creation, so let's cache it
Expand All @@ -41,11 +47,7 @@ public Precondition(String condition) throws PreconditionException {
try {
script = engine.compile(condition);
} catch (ScriptException e) {
if (e.getCause() != null) {
throw new PreconditionException(e.getCause().getMessage());
} else {
throw new PreconditionException(e.getMessage());
}
throw getProgramFailureExceptionDueToPrecondition(condition, e);
}
}

Expand All @@ -61,7 +63,7 @@ public ScriptContext createContext() {
return context;
}

public boolean apply(Row row) throws PreconditionException {
public boolean apply(Row row) {
Bindings bindings = new SimpleBindings();
for (int i = 0; i < row.width(); ++i) {
bindings.put(row.getColumn(i), row.getValue(i));
Expand All @@ -72,20 +74,33 @@ public boolean apply(Row row) throws PreconditionException {
scriptContext.setBindings(bindings, ScriptContext.ENGINE_SCOPE);
Object result = script.eval(scriptContext);
if (!(result instanceof Boolean)) {
throw new PreconditionException(
String.format("Precondition '%s' does not result in true or false.", condition)
);
String errorMessage = String.format("Precondition '%s' does not result in true or false.",
condition);
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(null, errorMessage,
ErrorType.USER, false);
}
return (Boolean) result;
} catch (ScriptException e) {
// Generally JexlException wraps the original exception, so it's good idea
// to check if there is a inner exception, if there is wrap it in 'DirectiveExecutionException'
// else just print the error message.
if (e.getCause() != null) {
throw new PreconditionException(e.getCause().getMessage());
} else {
throw new PreconditionException(e.getMessage());
}
throw getProgramFailureExceptionDueToPrecondition(condition, e);
}
}

private static ProgramFailureException getProgramFailureExceptionDueToPrecondition(
String condition, ScriptException e) {
String errorMessage;
if (e.getCause() != null) {
LOG.error("Error in evaluating precondition '{}'. {}: {}", condition, e.getClass().getName(),
e.getCause().getMessage());
errorMessage = e.getCause().getMessage();
} else {
LOG.error("Error in evaluating precondition '{}'. {}: {}", condition, e.getClass().getName(),
e.getMessage());
errorMessage = e.getMessage();
}
return WranglerUtil.getProgramFailureExceptionDetailsFromChain(
new PreconditionException(errorMessage), null, ErrorType.SYSTEM, false);
}
}
100 changes: 74 additions & 26 deletions wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.exception.ErrorType;
import io.cdap.cdap.api.metrics.Metrics;
import io.cdap.cdap.api.plugin.PluginConfig;
import io.cdap.cdap.api.plugin.PluginProperties;
Expand Down Expand Up @@ -53,6 +54,7 @@
import io.cdap.wrangler.api.EntityCountMetric;
import io.cdap.wrangler.api.ErrorRecord;
import io.cdap.wrangler.api.ExecutorContext;
import io.cdap.wrangler.api.RecipeException;
import io.cdap.wrangler.api.RecipeParser;
import io.cdap.wrangler.api.RecipePipeline;
import io.cdap.wrangler.api.RecipeSymbol;
Expand Down Expand Up @@ -243,9 +245,13 @@ public void configurePipeline(PipelineConfigurer configurer) {
}
}
} catch (CompileException e) {
collector.addFailure("Compilation error occurred : " + e.getMessage(), null);
collector.addFailure(
String.format("Compilation error occurred, %s: %s ", e.getClass().getName(),
e.getMessage()), null);
} catch (DirectiveParseException e) {
collector.addFailure(e.getMessage(), null);
collector.addFailure(
String.format("Error parsing directive, %s: %s", e.getClass().getName(),
e.getMessage()), null);
}

// Based on the configuration create output schema.
Expand All @@ -254,8 +260,9 @@ public void configurePipeline(PipelineConfigurer configurer) {
oSchema = Schema.parseJson(config.schema);
}
} catch (IOException e) {
collector.addFailure("Invalid output schema.", null)
.withConfigProperty(Config.NAME_SCHEMA).withStacktrace(e.getStackTrace());
collector.addFailure(
String.format("Invalid output schema %s: %s", e.getClass().getName(), e.getMessage()),
null).withConfigProperty(Config.NAME_SCHEMA).withStacktrace(e.getStackTrace());
}

// Check if jexl pre-condition is not null or empty and if so compile expression.
Expand All @@ -265,7 +272,9 @@ && checkPreconditionNotEmpty(false)) {
try {
new Precondition(config.getPreconditionJEXL());
} catch (PreconditionException e) {
collector.addFailure(e.getMessage(), null).withConfigProperty(Config.NAME_PRECONDITION);
collector.addFailure(String.format("Error compiling precondition expression, %s: %s",
e.getClass().getName(), e.getMessage()), null)
.withConfigProperty(Config.NAME_PRECONDITION);
}
}
}
Expand All @@ -276,8 +285,11 @@ && checkPreconditionNotEmpty(false)) {
}

} catch (Exception e) {
LOG.error(e.getMessage());
collector.addFailure("Error occurred : " + e.getMessage(), null).withStacktrace(e.getStackTrace());
LOG.error("Error occurred during configuration of the plugin, {}: {}", e.getClass().getName(),
e.getMessage());
collector.addFailure(
String.format("Error occurred during configuration of the plugin, %s: %s",
e.getClass().getName(), e.getMessage()), null).withStacktrace(e.getStackTrace());
}
}

Expand Down Expand Up @@ -319,7 +331,12 @@ public void prepareRun(StageSubmitterContext context) throws Exception {
// Parse the recipe and extract all the instances of directives
// to be processed for extracting lineage.
RecipeParser recipe = getRecipeParser(context);
List<Directive> directives = recipe.parse();
List<Directive> directives;
try {
directives = recipe.parse();
} catch (RecipeException e) {
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(e, null, ErrorType.SYSTEM, false);
}
emitDirectiveMetrics(directives, context.getMetrics());

LineageOperations lineageOperations = new LineageOperations(input, output, directives);
Expand All @@ -345,10 +362,11 @@ public void initialize(TransformContext context) throws Exception {
try {
oSchema = Schema.parseJson(config.schema);
} catch (IOException e) {
throw new IllegalArgumentException(
String.format("Stage:%s - Format of output schema specified is invalid. Please check the format.",
context.getStageName()), e
);
String errorMessage = String.format("Error in stage '%s'. Format of output schema specified "
+ "is invalid. Please check the format. %s: %s", context.getStageName(),
e.getClass().getName(), e.getMessage());
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(
new IllegalArgumentException(errorMessage, e), errorMessage, ErrorType.USER, false);
}

// Check if jexl pre-condition is not null or empty and if so compile expression.
Expand All @@ -358,7 +376,7 @@ && checkPreconditionNotEmpty(false)) {
try {
condition = new Precondition(config.getPreconditionJEXL());
} catch (PreconditionException e) {
throw new IllegalArgumentException(e.getMessage(), e);
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(e, null, ErrorType.SYSTEM, false);
}
}
}
Expand All @@ -367,7 +385,11 @@ && checkPreconditionNotEmpty(false)) {
// Create the pipeline executor with context being set.
pipeline = new RecipePipelineExecutor(recipe, ctx);
} catch (Exception e) {
throw new Exception(String.format("Stage:%s - %s", getContext().getStageName(), e.getMessage()), e);
String errorMessage = String.format(
"Error in stage '%s'. Please check the configuration or input data. %s: %s",
context.getStageName(), e.getClass().getName(), e.getMessage());
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(e, errorMessage,
ErrorType.SYSTEM, false);
}

String defaultStrategy = context.getArguments().get(ERROR_STRATEGY_DEFAULT);
Expand Down Expand Up @@ -437,8 +459,10 @@ && checkPreconditionNotEmpty(false)) {
}
if (WRANGLER_FAIL_PIPELINE_FOR_ERROR.isEnabled(getContext())
&& onErrorStrategy.equalsIgnoreCase(ON_ERROR_FAIL_PIPELINE)) {
throw new Exception(
String.format("Errors in Wrangler Transformation - %s", errorMessages));
String errorReason = String.format("Errors in Wrangler Transformation - %s",
errorMessages);
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(null, errorReason,
ErrorType.SYSTEM, true);
}
}
} catch (Exception e) {
Expand All @@ -457,8 +481,10 @@ && checkPreconditionNotEmpty(false)) {
getContext().getStageName(), e.getMessage()),
"value", String.valueOf(errorCounter)
));
throw new Exception(String.format("Stage:%s - Failing pipeline due to error : %s",
getContext().getStageName(), e.getMessage()), e);
String errorMessage = String.format("Pipeline failed at stage:%s, %s: %s",
getContext().getStageName(), e.getClass().getName(), e.getMessage());
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(e, errorMessage,
ErrorType.SYSTEM, true);
}
// If it's 'skip-on-error' we continue processing and don't emit any error records.
return;
Expand Down Expand Up @@ -553,18 +579,32 @@ private boolean checkPreconditionNotEmpty(Boolean isConditionSQL) {
* @throws DirectiveLoadException
* @throws DirectiveParseException
*/
private RecipeParser getRecipeParser(StageContext context)
throws DirectiveLoadException, DirectiveParseException {
private RecipeParser getRecipeParser(StageContext context) {

registry = new CompositeDirectiveRegistry(SystemDirectiveRegistry.INSTANCE, new UserDirectiveRegistry(context));
registry.reload(context.getNamespace());
try {
registry.reload(context.getNamespace());
} catch (DirectiveLoadException e) {
LOG.error("Failed to reload the directive registry for namespace "
+ "'{}' at stage '{}'. Please verify the namespace and ensure the directives are "
+ "correctly configured. {}: {}", context.getNamespace(), context.getStageName(),
e.getClass().getName(), e.getMessage());
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(e, null, ErrorType.USER, false);
}

String directives = config.getDirectives();
if (config.getUDDs() != null && !config.getUDDs().trim().isEmpty()) {
directives = String.format("#pragma load-directives %s;%s", config.getUDDs(), config.getDirectives());
}

return new GrammarBasedParser(context.getNamespace(), new MigrateToV2(directives).migrate(), registry);
try {
return new GrammarBasedParser(context.getNamespace(), new MigrateToV2(directives).migrate(), registry);
} catch (DirectiveParseException e) {
LOG.error("Failed to parse directives for namespace '{}' at stage "
+ "'{}'. Please verify the directives and ensure they are correctly formatted. {}, {}",
context.getNamespace(), context.getStageName(), e.getClass().getName(), e.getMessage());
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(e, null, ErrorType.USER, false);
}
}

@Override
Expand All @@ -573,7 +613,8 @@ public Relation transform(RelationalTranformContext relationalTranformContext, R
&& checkPreconditionNotEmpty(true)) {

if (!Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext)) {
throw new RuntimeException("SQL Precondition feature is not available");
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(null,
"SQL Precondition feature is not available", ErrorType.SYSTEM, true);
}

Optional<ExpressionFactory<String>> expressionFactory = getExpressionFactory(relationalTranformContext);
Expand All @@ -598,11 +639,18 @@ private Optional<ExpressionFactory<String>> getExpressionFactory(RelationalTranf
* @param directives a list of Wrangler directives
* @param metrics CDAP {@link Metrics} object using which metrics can be emitted
*/
private void emitDirectiveMetrics(List<Directive> directives, Metrics metrics) throws DirectiveLoadException {
private void emitDirectiveMetrics(List<Directive> directives, Metrics metrics) {
for (Directive directive : directives) {
// skip emitting metrics if the directive is not system directive
if (registry.get(Contexts.SYSTEM, directive.define().getDirectiveName()) == null) {
continue;
try {
if (registry.get(Contexts.SYSTEM, directive.define().getDirectiveName()) == null) {
continue;
}
} catch (DirectiveLoadException e) {
LOG.error("Error loading system directive '{}'. {}: {}",
directive.define().getDirectiveName(), e.getClass().getName(), e.getMessage());
throw WranglerUtil.getProgramFailureExceptionDetailsFromChain(e, null, ErrorType.SYSTEM,
false);
}
List<EntityCountMetric> countMetrics = new ArrayList<>();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright © 2025 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package io.cdap.wrangler;

import com.google.common.base.Throwables;
import io.cdap.cdap.api.exception.ErrorCategory;
import io.cdap.cdap.api.exception.ErrorType;
import io.cdap.cdap.api.exception.ErrorUtils;
import io.cdap.cdap.api.exception.ProgramFailureException;
import io.cdap.wrangler.api.DirectiveLoadException;
import io.cdap.wrangler.api.DirectiveParseException;
import io.cdap.wrangler.api.RecipeException;
import java.util.List;

/**
* Util file to handle exceptions caught in Wrangler plugin
*/
public class WranglerUtil {

private WranglerUtil() {
throw new IllegalStateException("Utility class");
}

public static ProgramFailureException getProgramFailureExceptionDetailsFromChain(Throwable e,
String errorMessage, ErrorType errorType, boolean dependency) {
if (e != null) {
List<Throwable> causalChain = Throwables.getCausalChain(e);
for (Throwable t : causalChain) {
if (t instanceof ProgramFailureException) {
// Avoid double wrap
return (ProgramFailureException) t;
}
if (t instanceof DirectiveLoadException || t instanceof DirectiveParseException
|| t instanceof RecipeException || t instanceof PreconditionException) {
return getProgramFailureException((Exception) t, errorType, dependency);
}
}
}
// If no predefined exception found in the causal chain, return generic program failure exception
return ErrorUtils.getProgramFailureException(
new ErrorCategory(ErrorCategory.ErrorCategoryEnum.PLUGIN), errorMessage, errorMessage,
errorType, dependency, e);
}

/**
* Get a ProgramFailureException with the given error information from {@link Exception}.
*
* @param exception The Exception to get the error information from.
* @param errorType The ErrorType to get the error type information.
* @param dependency The dependency to show if it depends on external source or not
* @return A ProgramFailureException with the given error information.
*/
private static ProgramFailureException getProgramFailureException(Exception exception,
ErrorType errorType, boolean dependency) {
String errorMessage = exception.getMessage();
return ErrorUtils.getProgramFailureException(
new ErrorCategory(ErrorCategory.ErrorCategoryEnum.PLUGIN), errorMessage, errorMessage,
errorType, dependency, exception);
}

}

0 comments on commit 1613e69

Please sign in to comment.