substrait-io
diff --git a/‎examples/isthmus-api/.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎examples/isthmus-api/.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/isthmus-api/README.md‎
Lines changed: 70 additions & 0 deletions b/‎examples/isthmus-api/README.md‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎examples/isthmus-api/build.gradle.kts‎
Lines changed: 32 additions & 0 deletions b/‎examples/isthmus-api/build.gradle.kts‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java‎
Lines changed: 103 additions & 0 deletions b/‎examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎examples/isthmus-api/src/main/java/io/substrait/examples/IsthmusAppExamples.java‎
Lines changed: 53 additions & 0 deletions b/‎examples/isthmus-api/src/main/java/io/substrait/examples/IsthmusAppExamples.java‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎examples/isthmus-api/src/main/java/io/substrait/examples/SchemaHelper.java‎
Lines changed: 39 additions & 0 deletions b/‎examples/isthmus-api/src/main/java/io/substrait/examples/SchemaHelper.java‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎examples/isthmus-api/src/main/java/io/substrait/examples/ToSql.java‎
Lines changed: 79 additions & 0 deletions b/‎examples/isthmus-api/src/main/java/io/substrait/examples/ToSql.java‎
Lines changed: 79 additions & 0 deletions
@@ -0,0 +1,4 @@
+_apps
+_data
+**/*/bin
+build
@@ -0,0 +1,70 @@
+# Isthmus API Examples
+
+The Isthmus library converts Substrait plans to and from Spark Plans. There are two examples showing convertion in each direction.
+
+## How does this work in theory?
+
+In both cases, the Calcite library is used to do parsing and generation of the SQL String. Calcite has it's own relational object model, so there are clalsses within Ishtmus to convert Substrait to and from Calcites object model.
+
+Converting to Substrait from SQL will use Calcite to parse the SQL to an object model, and then it will be converted to Substrait.
+
+Converting from Substrait to SQL will involved converting Substrait to Calcite's object model, then asking Calcite to generate SQL strings.
+
+## Running the examples
+
+There are 2 example classes:
+
+- [FromSql](./src/main/java/io/substrait/examples/FromSql.java) that creates a plan starting from SQL
+- [ToSql](./app/src/main/java/io/substrait/examples/ToSQL.java) that reads a plan and creats the SQL
+
+
+### Requirements
+
+To run these you will need:
+
+- Java 17 or greater
+- [Two datafiles](./app/src/main/resources/) are provided for the sample data
+
+
+## Creating a Substrait Plan from SQL
+
+To run [`FromSql.java`](./src/main/java/io/substrait/examples/FromSql.java) from the root of this repository. `subtrait.plan` is the name of file written.
+
+```bash
+ ./gradlew examples:isthmus-api:run --args "FromSql substrait.plan"
+> Task :examples:isthmus-api:run
+Plan{version=Version{major=0, minor=77, patch=0, producer=isthmus}, roots=[Root{input=Sort{input=Aggregate{input=Project{remap=Remap{indices=[15]}, input=Filter{input=Join{left=NamedScan{initialSchema=NamedStruct{struct=Struct{nullable=false, fields=[VarChar{nullable=true, length=15}, VarChar{nullable=true, length=40}, VarChar{nullable=true, length=40}, VarChar{nullable=true, length=15}, VarChar{nullable=true, length=15}, I32{nullable=true}, VarChar{nullable=true, length=15}]}, names=[vehicle_id, make, model, colour, fuel_type, cylinder_capacity, first_use_date]}, names=[vehicles]}, right=NamedScan{initialSchema=NamedStruct{struct=Struct{nullable=false, fields=[VarChar{nullable=true, length=15}, VarChar{nullable=true, length=15}, VarChar{nullable=true, length=20}, VarChar{nullable=true, length=20}, VarChar{nullable=true, length=20}, VarChar{nullable=true, length=15}, I32{nullable=true}, VarChar{nullable=true, length=15}]}, names=[test_id, vehicle_id, test_date, test_class, test_type, test_result, test_mileage, postcode_area]}, names=[tests]}, condition=ScalarFunctionInvocation{declaration=equal:any_any, arguments=[FieldReference{segments=[StructField{offset=0}], type=VarChar{nullable=true, length=15}}, FieldReference{segments=[StructField{offset=8}], type=VarChar{nullable=true, length=15}}], options=[], outputType=Bool{nullable=true}}, joinType=INNER}, condition=ScalarFunctionInvocation{declaration=equal:any_any, arguments=[FieldReference{segments=[StructField{offset=12}], type=VarChar{nullable=true, length=15}}, VarCharLiteral{nullable=false, value=P, length=15}], options=[], outputType=Bool{nullable=true}}}, expressions=[FieldReference{segments=[StructField{offset=3}], type=VarChar{nullable=true, length=15}}]}, groupings=[Grouping{expressions=[FieldReference{segments=[StructField{offset=0}], type=VarChar{nullable=true, length=15}}]}], measures=[Measure{function=AggregateFunctionInvocation{declaration=count:, arguments=[], options=[], aggregationPhase=INITIAL_TO_RESULT, sort=[], outputType=I64{nullable=false}, invocation=ALL}}]}, sortFields=[SortField{expr=FieldReference{segments=[StructField{offset=1}], type=Struct{nullable=false, fields=[VarChar{nullable=true, length=15}, I64{nullable=false}]}}, direction=ASC_NULLS_LAST}]}, names=[COLOUR, COLOURCOUNT]}], expectedTypeUrls=[]}
+File written to substrait.plan
+```
+
+It is a binary file, so to check the file written out
+```bash
+ls -l examples/isthmus-api/substrait.plan
+-rw-r--r-- 1 matthew matthew 808 Dec  1 12:05 examples/isthmus-api/substrait.plan
+```
+
+Please see the code comments for details of how the conversion is done.
+
+## Creating SQL from a Substrait Plan
+
+To run [`ToSql.java`](./src/main/java/io/substrait/examples/ToSql.java) from the root of this repository
+`subtrait.plan` is the name of file to be read - and probably will be the first created with `FromSql`.
+
+```bash
+./gradlew examples:isthmus-api:run --args "ToSql substrait.plan"
+
+> Task :examples:isthmus-api:run
+Reading from substrait.plan
+Plan{version=Version{major=0, minor=77, patch=0, producer=isthmus}, roots=[Root{input=Sort{input=Aggregate{input=Project{remap=Remap{indices=[15]}, input=Filter{input=Join{left=NamedScan{initialSchema=NamedStruct{struct=Struct{nullable=false, fields=[VarChar{nullable=true, length=15}, VarChar{nullable=true, length=40}, VarChar{nullable=true, length=40}, VarChar{nullable=true, length=15}, VarChar{nullable=true, length=15}, I32{nullable=true}, VarChar{nullable=true, length=15}]}, names=[vehicle_id, make, model, colour, fuel_type, cylinder_capacity, first_use_date]}, names=[vehicles]}, right=NamedScan{initialSchema=NamedStruct{struct=Struct{nullable=false, fields=[VarChar{nullable=true, length=15}, VarChar{nullable=true, length=15}, VarChar{nullable=true, length=20}, VarChar{nullable=true, length=20}, VarChar{nullable=true, length=20}, VarChar{nullable=true, length=15}, I32{nullable=true}, VarChar{nullable=true, length=15}]}, names=[test_id, vehicle_id, test_date, test_class, test_type, test_result, test_mileage, postcode_area]}, names=[tests]}, condition=ScalarFunctionInvocation{declaration=equal:any_any, arguments=[FieldReference{segments=[StructField{offset=0}], type=VarChar{nullable=true, length=15}}, FieldReference{segments=[StructField{offset=8}], type=VarChar{nullable=true, length=15}}], options=[], outputType=Bool{nullable=true}}, joinType=INNER}, condition=ScalarFunctionInvocation{declaration=equal:any_any, arguments=[FieldReference{segments=[StructField{offset=12}], type=VarChar{nullable=true, length=15}}, VarCharLiteral{nullable=false, value=P, length=15}], options=[], outputType=Bool{nullable=true}}}, expressions=[FieldReference{segments=[StructField{offset=3}], type=VarChar{nullable=true, length=15}}]}, groupings=[Grouping{expressions=[FieldReference{segments=[StructField{offset=0}], type=VarChar{nullable=true, length=15}}]}], measures=[Measure{function=AggregateFunctionInvocation{declaration=count:, arguments=[], options=[], aggregationPhase=INITIAL_TO_RESULT, sort=[], outputType=I64{nullable=false}, invocation=ALL}}]}, sortFields=[SortField{expr=FieldReference{segments=[StructField{offset=1}], type=I64{nullable=false}}, direction=ASC_NULLS_LAST}]}, names=[COLOUR, COLOURCOUNT]}], expectedTypeUrls=[]}
+
+SELECT `t2`.`colour0` AS `COLOUR`, `t2`.`$f1` AS `COLOURCOUNT`
+FROM (SELECT `vehicles`.`colour` AS `colour0`, COUNT(*) AS `$f1`
+FROM `vehicles`
+INNER JOIN `tests` ON `vehicles`.`vehicle_id` = `tests`.`vehicle_id`
+WHERE `tests`.`test_result` = 'P'
+GROUP BY `vehicles`.`colour`
+ORDER BY COUNT(*) IS NULL, 2) AS `t2`
+
+```
+
+The SQL statement in the selected dialect will be created (MySql is used in the example).
@@ -0,0 +1,32 @@
+plugins {
+  // Apply the application plugin to add support for building a CLI application in Java.
+  id("application")
+  alias(libs.plugins.spotless)
+  id("substrait.java-conventions")
+}
+
+repositories {
+  // Use Maven Central for resolving dependencies.
+  mavenCentral()
+}
+
+dependencies {
+  implementation(project(":isthmus"))
+  implementation(libs.calcite.core)
+  implementation(libs.calcite.server)
+  // For a real Spark application, these would not be required since they would be in the Spark
+  // server classpath
+  runtimeOnly(libs.spark.core)
+  runtimeOnly(libs.spark.hive)
+}
+
+application { mainClass = "io.substrait.examples.IsthmusAppExamples" }
+
+tasks.named<Test>("test") {
+  // Use JUnit Platform for unit tests.
+  useJUnitPlatform()
+}
+
+java { toolchain { languageVersion.set(JavaLanguageVersion.of(17)) } }
+
+tasks.pmdMain { dependsOn(":core:shadowJar") }
@@ -0,0 +1,103 @@
+package io.substrait.examples;
+
+import io.substrait.examples.IsthmusAppExamples.Action;
+import io.substrait.isthmus.SqlToSubstrait;
+import io.substrait.isthmus.SubstraitTypeSystem;
+import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
+import io.substrait.plan.Plan;
+import io.substrait.plan.PlanProtoConverter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.config.CalciteConnectionProperty;
+import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
+import org.apache.calcite.prepare.CalciteCatalogReader;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.sql.SqlDialect;
+import org.apache.calcite.sql.parser.SqlParseException;
+
+/**
+ * Substrait from SQL conversions.
+ *
+ * <p>There are 4 steps in the whole process.
+ *
+ * <p>1) A fully typed schema is required for the 'inputs'. Within a SQL context this is the `CREATE
+ * TABLE` commands; this needs to be converted to a Calcite Schema 2) The SQL query to convert (ion
+ * one type of dialect) 3) Conversion of the SQL query to Calcite Relations 4) Conversion of the
+ * Calcite Relations to Substrait relations
+ *
+ * <p>Note that schema could be created from other means eg Caclcite's refelect based schema.
+ *
+ * <p>The substrait plan can then be used as wished.
+ */
+public class FromSql implements Action {
+
+  @Override
+  public void run(final String[] args) {
+    try {
+      final String createSql =
+          """
+                    CREATE TABLE "vehicles" ("vehicle_id" varchar(15), "make" varchar(40), "model" varchar(40),
+                        "colour" varchar(15), "fuel_type" varchar(15),
+                        "cylinder_capacity" int, "first_use_date" varchar(15));
+
+                    CREATE TABLE "tests" ("test_id" varchar(15), "vehicle_id" varchar(15),
+                             "test_date" varchar(20), "test_class" varchar(20), "test_type" varchar(20),
+                             "test_result" varchar(15),"test_mileage" int, "postcode_area" varchar(15));
+
+                      """;
+
+      // Create the Caclcite Schema from the CREATE TABLES statements
+      // as this is a SQL it could be in a schema, but the Isthmus Helper classes here are assuminmg
+      // a common SQL format
+      final CalciteSchema calciteSchema = CalciteSchema.createRootSchema(false);
+      SubstraitCreateStatementParser.processCreateStatements(createSql)
+          .forEach(t -> calciteSchema.add(t.getName(), t));
+
+      // Type Factory based on Java Types
+      final RelDataTypeFactory typeFactory =
+          new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM);
+
+      // Default configuration for calcite
+      final CalciteConnectionConfig calciteDefaultConfig =
+          CalciteConnectionConfig.DEFAULT.set(
+              CalciteConnectionProperty.CASE_SENSITIVE, Boolean.FALSE.toString());
+
+      final CalciteCatalogReader catalogReader =
+          new CalciteCatalogReader(calciteSchema, List.of(), typeFactory, calciteDefaultConfig);
+
+      // Query that needs to be converted; again this could be in a variety of SQL dialects
+      final String query =
+          """
+          SELECT vehicles.colour, count(*) as colourcount FROM vehicles INNER JOIN tests
+              ON vehicles.vehicle_id=tests.vehicle_id WHERE tests.test_result = 'P'
+              GROUP BY vehicles.colour ORDER BY count(*)
+          """;
+      final SqlToSubstrait sqlToSubstrait = new SqlToSubstrait();
+
+      // choose Apache Derby as an example dialect
+      final SqlDialect dialect = SqlDialect.DatabaseProduct.DERBY.getDialect();
+      final Plan substraitPlan = sqlToSubstrait.convert(query, catalogReader, dialect);
+
+      System.out.println(substraitPlan);
+
+      // write out to file if given a file name
+      // convert to a protobuff byte array and write as binary file
+      if (args.length == 1) {
+        final PlanProtoConverter planToProto = new PlanProtoConverter();
+        final byte[] buffer = planToProto.toProto(substraitPlan).toByteArray();
+
+        final Path outputFile = Paths.get(args[0]);
+        Files.write(outputFile, buffer);
+        System.out.println("File written to " + outputFile);
+      }
+
+    } catch (SqlParseException | IOException e) {
+      e.printStackTrace();
+    }
+  }
+}
@@ -0,0 +1,53 @@
+package io.substrait.examples;
+
+import java.util.Arrays;
+
+/** Main class */
+public final class IsthmusAppExamples {
+
+  /** Implemented by all examples */
+  @FunctionalInterface
+  public interface Action {
+
+    /**
+     * Run
+     *
+     * @param args String []
+     */
+    void run(String[] args);
+  }
+
+  private IsthmusAppExamples() {}
+
+  /**
+   * Traditional main method
+   *
+   * @param args string[]
+   */
+  @SuppressWarnings("unchecked")
+  public static void main(final String args[]) {
+    try {
+
+      if (args.length == 0) {
+        System.err.println(
+            "Please provide base classname of example to run. eg ToSql to run class io.substrait.examples.ToSql ");
+        System.exit(-1);
+      }
+      final String exampleClass = args[0];
+
+      final Class<Action> clz =
+          (Class<Action>)
+              Class.forName(
+                  String.format("%s.%s", IsthmusAppExamples.class.getPackageName(), exampleClass));
+      final Action action = clz.getDeclaredConstructor().newInstance();
+      if (args.length == 1) {
+        action.run(new String[] {});
+      } else {
+        action.run(Arrays.copyOfRange(args, 1, args.length));
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.exit(-1);
+    }
+  }
+}
@@ -0,0 +1,39 @@
+package io.substrait.examples;
+
+import io.substrait.isthmus.calcite.SubstraitTable;
+import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.prepare.CalciteCatalogReader;
+import org.apache.calcite.sql.parser.SqlParseException;
+
+/** Helper functions for schemas. */
+public final class SchemaHelper {
+
+  private SchemaHelper() {}
+
+  /**
+   * Parses one or more SQL strings containing only CREATE statements into a {@link
+   * CalciteCatalogReader}
+   *
+   * @param createStatements a SQL string containing only CREATE statements
+   * @return a {@link CalciteCatalogReader} generated from the CREATE statements
+   * @throws SqlParseException
+   */
+  public static CalciteSchema processCreateStatementsToSchema(final List<String> createStatements)
+      throws SqlParseException {
+
+    final List<SubstraitTable> tables = new ArrayList<>();
+    for (final String statement : createStatements) {
+      tables.addAll(SubstraitCreateStatementParser.processCreateStatements(statement));
+    }
+
+    final CalciteSchema rootSchema = CalciteSchema.createRootSchema(false);
+    for (final SubstraitTable table : tables) {
+      rootSchema.add(table.getName(), table);
+    }
+
+    return rootSchema;
+  }
+}
@@ -0,0 +1,79 @@
+package io.substrait.examples;
+
+import io.substrait.examples.IsthmusAppExamples.Action;
+import io.substrait.extension.DefaultExtensionCatalog;
+import io.substrait.extension.SimpleExtension;
+import io.substrait.isthmus.SubstraitToCalcite;
+import io.substrait.isthmus.SubstraitTypeSystem;
+import io.substrait.plan.Plan;
+import io.substrait.plan.Plan.Root;
+import io.substrait.plan.ProtoPlanConverter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.rel2sql.RelToSqlConverter;
+import org.apache.calcite.sql.SqlDialect;
+import org.apache.calcite.sql.SqlNode;
+
+/**
+ * Substrait to SQL conversions.
+ *
+ * <p>There are steps in the whole process
+ *
+ * <p>1) Load the plan into the protobuf object, and creatithe in POJO memory object. 2) Create a
+ * Converter to map the Substrait to Calcite relations. This will need the type system to use and
+ * the collection of extensions to put into the substrait plan. 3) Given configuration, convert the
+ * Calcite relational nodes to SQL statements.
+ *
+ * <p>It is possible to get multiple SQL statements from a single Substrait plan.
+ */
+public class ToSql implements Action {
+
+  @Override
+  public void run(String[] args) {
+
+    try {
+
+      // Load the protobuf binary file into a Substrait Plan POJO
+      System.out.println("Reading from " + args[0]);
+      final byte[] buffer = Files.readAllBytes(Paths.get(args[0]));
+
+      final io.substrait.proto.Plan proto = io.substrait.proto.Plan.parseFrom(buffer);
+      final ProtoPlanConverter protoToPlan = new ProtoPlanConverter();
+      final Plan substraitPlan = protoToPlan.from(proto);
+
+      // output the plan for information
+      System.out.println(substraitPlan);
+
+      final SimpleExtension.ExtensionCollection extensions =
+          DefaultExtensionCatalog.DEFAULT_COLLECTION;
+      final SubstraitToCalcite converter =
+          new SubstraitToCalcite(
+              extensions, new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM));
+
+      // Determine which SQL Dialect we want the resultnat queries to be in
+      final SqlDialect sqlDialect = SqlDialect.DatabaseProduct.MYSQL.getDialect();
+
+      // Create the Sql to Calcite Relation Parser
+      final RelToSqlConverter relToSql = new RelToSqlConverter(sqlDialect);
+      final List<String> sqlStrings = new ArrayList<>();
+
+      // and get each root from the calcite plan; Then deployme this plan into the sql creaton step
+      for (final Root root : substraitPlan.getRoots()) {
+        final RelNode calciteRelNode = converter.convert(root).project(true);
+        final SqlNode sqlNode = relToSql.visitRoot(calciteRelNode).asStatement();
+
+        final String sqlString = sqlNode.toSqlString(sqlDialect).getSql();
+        sqlStrings.add(sqlString);
+      }
+      sqlStrings.forEach(System.out::println);
+
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +_apps
 +_data
 +**/*/bin
 +build