Skip to content

Commit dada46e

Browse files
committed
Add module for xtable rest service
1 parent a39988e commit dada46e

18 files changed

+885
-0
lines changed

pom.xml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
<module>xtable-utilities</module>
5454
<module>xtable-aws</module>
5555
<module>xtable-hive-metastore</module>
56+
<module>xtable-service</module>
5657
</modules>
5758

5859
<properties>
@@ -97,6 +98,10 @@
9798
<apache-jar-resource-bundle.version>1.7</apache-jar-resource-bundle.version>
9899
<apache-incubator-disclaimer-resource-bundle.version>1.7</apache-incubator-disclaimer-resource-bundle.version>
99100
<scala-collection-compat.version>2.12.0</scala-collection-compat.version>
101+
<quarkus.platform.artifact-id>quarkus-bom</quarkus.platform.artifact-id>
102+
<quarkus.platform.group-id>io.quarkus.platform</quarkus.platform.group-id>
103+
<quarkus.platform.version>3.2.12.Final</quarkus.platform.version> <!-- compatible with Java 11 -->
104+
<antlr4.version>4.9.3</antlr4.version> <!-- added to resolve dep conflict with service-->
100105

101106
<!-- Test properties -->
102107
<skipTests>false</skipTests>
@@ -352,6 +357,16 @@
352357
<version>${spark.version}</version>
353358
<scope>provided</scope>
354359
</dependency>
360+
<dependency>
361+
<groupId>org.antlr</groupId>
362+
<artifactId>antlr4-runtime</artifactId>
363+
<version>${antlr4.version}</version>
364+
</dependency>
365+
<dependency>
366+
<groupId>org.scala-lang</groupId>
367+
<artifactId>scala-reflect</artifactId>
368+
<version>${scala.version}</version>
369+
</dependency>
355370

356371
<dependency>
357372
<groupId>commons-cli</groupId>
@@ -594,6 +609,13 @@
594609
<artifactId>jettison</artifactId>
595610
<version>1.5.4</version>
596611
</dependency>
612+
<dependency>
613+
<groupId>${quarkus.platform.group-id}</groupId>
614+
<artifactId>${quarkus.platform.artifact-id}</artifactId>
615+
<version>${quarkus.platform.version}</version>
616+
<type>pom</type>
617+
<scope>import</scope>
618+
</dependency>
597619
</dependencies>
598620
</dependencyManagement>
599621

xtable-service/README.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<!--
2+
- Licensed to the Apache Software Foundation (ASF) under one
3+
- or more contributor license agreements. See the NOTICE file
4+
- distributed with this work for additional information
5+
- regarding copyright ownership. The ASF licenses this file
6+
- to you under the Apache License, Version 2.0 (the
7+
- "License"); you may not use this file except in compliance
8+
- with the License. You may obtain a copy of the License at
9+
-
10+
- http://www.apache.org/licenses/LICENSE-2.0
11+
-
12+
- Unless required by applicable law or agreed to in writing,
13+
- software distributed under the License is distributed on an
14+
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
- KIND, either express or implied. See the License for the
16+
- specific language governing permissions and limitations
17+
- under the License.
18+
-->
19+
20+
# XTable REST Service
21+
22+
The `rest-service-open-api.yaml` defines the api contract for running table format conversion using XTable's REST service.
23+
See XTable's `spec` module for more details: https://github.com/apache/incubator-xtable/tree/main/spec
24+
25+
## How to run the service locally
26+
27+
#### Before running the service, ensure that you have the required credentials set in your enviroment needed to read and write to cloud storage.
28+
29+
To run the service locally, first ensure you have built the project with
30+
```sh
31+
mvn clean install -DskipTests
32+
```
33+
34+
35+
Then you can run start the quarkus service using the following command:
36+
```sh
37+
mvn quarkus:dev -pl xtable-service
38+
```
39+
This will start the service on `http://localhost:8080`.
40+
41+
Note quarkus will automatically reload the service when you make changes to the code.
42+
43+
## Testing with Postman
44+
45+
If you would like to test the service with an api client, you can download Postman https://www.postman.com/downloads/
46+
47+
Ensure that when you are testing that you have set the service URL, headers, and request body correctly.
48+
See the screenshots below for an example.
49+
50+
![Screenshot 2025-05-01 at 9.04.59 AM.png](examples/Screenshot%202025-05-01%20at%209.04.59%E2%80%AFAM.png)
51+
52+
![Screenshot 2025-05-01 at 9.05.10 AM.png](examples/Screenshot%202025-05-01%20at%209.05.10%E2%80%AFAM.png)
271 KB
Loading
282 KB
Loading

xtable-service/pom.xml

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
~ Licensed to the Apache Software Foundation (ASF) under one or more
4+
~ contributor license agreements. See the NOTICE file distributed with
5+
~ this work for additional information regarding copyright ownership.
6+
~ The ASF licenses this file to You under the Apache License, Version 2.0
7+
~ (the "License"); you may not use this file except in compliance with
8+
~ the License. You may obtain a copy of the License at
9+
~
10+
~ http://www.apache.org/licenses/LICENSE-2.0
11+
~
12+
~ Unless required by applicable law or agreed to in writing, software
13+
~ distributed under the License is distributed on an "AS IS" BASIS,
14+
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
~ See the License for the specific language governing permissions and
16+
~ limitations under the License.
17+
-->
18+
<project xmlns="http://maven.apache.org/POM/4.0.0"
19+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
21+
<modelVersion>4.0.0</modelVersion>
22+
<parent>
23+
<groupId>org.apache.xtable</groupId>
24+
<artifactId>xtable</artifactId>
25+
<version>0.2.0-SNAPSHOT</version>
26+
</parent>
27+
28+
<artifactId>xtable-service</artifactId>
29+
30+
<dependencies>
31+
<dependency>
32+
<groupId>org.apache.xtable</groupId>
33+
<artifactId>xtable-core_${scala.binary.version}</artifactId>
34+
<version>${project.version}</version>
35+
</dependency>
36+
37+
<dependency>
38+
<groupId>org.apache.hadoop</groupId>
39+
<artifactId>hadoop-aws</artifactId>
40+
</dependency>
41+
42+
<!-- Spark -->
43+
<dependency>
44+
<groupId>org.apache.spark</groupId>
45+
<artifactId>spark-core_${scala.binary.version}</artifactId>
46+
<scope>provided</scope>
47+
</dependency>
48+
<dependency>
49+
<groupId>org.apache.spark</groupId>
50+
<artifactId>spark-sql_${scala.binary.version}</artifactId>
51+
<scope>provided</scope>
52+
</dependency>
53+
54+
<dependency>
55+
<groupId>com.fasterxml.jackson.core</groupId>
56+
<artifactId>jackson-annotations</artifactId>
57+
<version>${jackson.version}</version>
58+
</dependency>
59+
<dependency>
60+
<groupId>com.fasterxml.jackson.core</groupId>
61+
<artifactId>jackson-databind</artifactId>
62+
<version>${jackson.version}</version>
63+
</dependency>
64+
65+
<dependency>
66+
<groupId>io.quarkus</groupId>
67+
<artifactId>quarkus-arc</artifactId>
68+
</dependency>
69+
<dependency>
70+
<groupId>io.quarkus</groupId>
71+
<artifactId>quarkus-resteasy-reactive</artifactId>
72+
</dependency>
73+
<dependency>
74+
<groupId>io.quarkus</groupId>
75+
<artifactId>quarkus-resteasy-reactive-jackson</artifactId>
76+
</dependency>
77+
<dependency>
78+
<groupId>io.quarkus</groupId>
79+
<artifactId>quarkus-junit5</artifactId>
80+
<scope>test</scope>
81+
</dependency>
82+
<dependency>
83+
<groupId>io.rest-assured</groupId>
84+
<artifactId>rest-assured</artifactId>
85+
<scope>test</scope>
86+
</dependency>
87+
</dependencies>
88+
89+
<build>
90+
<plugins>
91+
<plugin>
92+
<groupId>${quarkus.platform.group-id}</groupId>
93+
<artifactId>quarkus-maven-plugin</artifactId>
94+
<version>${quarkus.platform.version}</version>
95+
<extensions>true</extensions>
96+
<executions>
97+
<execution>
98+
<goals>
99+
<goal>build</goal>
100+
<goal>generate-code</goal>
101+
<goal>generate-code-tests</goal>
102+
</goals>
103+
</execution>
104+
</executions>
105+
</plugin>
106+
</plugins>
107+
</build>
108+
109+
110+
</project>
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.xtable.service;
20+
21+
import org.apache.xtable.service.models.ConvertTableRequest;
22+
import org.apache.xtable.service.models.ConvertTableResponse;
23+
24+
import io.smallrye.common.annotation.Blocking;
25+
import jakarta.inject.Inject;
26+
import jakarta.ws.rs.Consumes;
27+
import jakarta.ws.rs.POST;
28+
import jakarta.ws.rs.Path;
29+
import jakarta.ws.rs.Produces;
30+
import jakarta.ws.rs.core.MediaType;
31+
32+
@Path("/v1/conversion")
33+
@Produces(MediaType.APPLICATION_JSON)
34+
@Consumes(MediaType.APPLICATION_JSON)
35+
public class ConversionResource {
36+
37+
@Inject ConversionService conversionService;
38+
39+
@POST
40+
@Path("/table")
41+
@Blocking
42+
public ConvertTableResponse runSync(ConvertTableRequest req) {
43+
return conversionService.runSync(req);
44+
}
45+
}
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.xtable.service;
20+
21+
import static org.apache.xtable.model.storage.TableFormat.DELTA;
22+
import static org.apache.xtable.model.storage.TableFormat.HUDI;
23+
import static org.apache.xtable.model.storage.TableFormat.ICEBERG;
24+
25+
import java.util.ArrayList;
26+
import java.util.Collections;
27+
import java.util.List;
28+
29+
import org.apache.commons.lang3.tuple.Pair;
30+
import org.apache.hadoop.conf.Configuration;
31+
import org.apache.hudi.common.table.timeline.HoodieInstant;
32+
33+
import org.apache.iceberg.BaseTable;
34+
import org.apache.iceberg.SchemaParser;
35+
import org.apache.iceberg.Snapshot;
36+
37+
import org.apache.iceberg.Table;
38+
import org.apache.iceberg.TableMetadata;
39+
import org.apache.iceberg.TableOperations;
40+
import org.apache.iceberg.hadoop.HadoopTables;
41+
import org.apache.xtable.conversion.ConversionConfig;
42+
import org.apache.xtable.conversion.ConversionController;
43+
import org.apache.xtable.conversion.ConversionSourceProvider;
44+
import org.apache.xtable.conversion.SourceTable;
45+
import org.apache.xtable.conversion.TargetTable;
46+
import org.apache.xtable.delta.DeltaConversionSourceProvider;
47+
import org.apache.xtable.hudi.HudiConversionSourceProvider;
48+
import org.apache.xtable.iceberg.IcebergConversionSourceProvider;
49+
import org.apache.xtable.service.models.ConvertTableRequest;
50+
import org.apache.xtable.service.models.ConvertTableResponse;
51+
import org.apache.xtable.service.models.InternalTable;
52+
import org.apache.xtable.service.spark.SparkHolder;
53+
54+
import jakarta.enterprise.context.ApplicationScoped;
55+
import jakarta.inject.Inject;
56+
57+
@ApplicationScoped
58+
public class ConversionService {
59+
@Inject SparkHolder sparkHolder;
60+
61+
public ConvertTableResponse runSync(ConvertTableRequest request) {
62+
ConversionController conversionController =
63+
new ConversionController(sparkHolder.jsc().hadoopConfiguration());
64+
SourceTable sourceTable =
65+
SourceTable.builder()
66+
.name(request.getSourceTableName())
67+
.basePath(request.getSourceTablePath())
68+
.formatName(request.getSourceFormat())
69+
.build();
70+
71+
List<TargetTable> targetTables = new ArrayList<>();
72+
for (String targetFormat : request.getTargetFormats()) {
73+
TargetTable targetTable =
74+
TargetTable.builder()
75+
.name(request.getSourceTableName())
76+
.basePath(request.getSourceTablePath())
77+
.formatName(targetFormat)
78+
.build();
79+
targetTables.add(targetTable);
80+
}
81+
ConversionConfig conversionConfig =
82+
ConversionConfig.builder()
83+
.sourceTable(sourceTable)
84+
.targetTables(targetTables)
85+
.build();
86+
ConversionSourceProvider<?> conversionSourceProvider =
87+
getConversionSourceProvider(request.getSourceFormat());
88+
conversionController.sync(conversionConfig, conversionSourceProvider);
89+
90+
Pair<String, String> responseFields = getIcebergSchemaAndMetadataPath(request.getSourceTablePath(), sparkHolder.jsc().hadoopConfiguration());
91+
92+
InternalTable internalTable =
93+
new InternalTable(
94+
"ICEBERG",
95+
responseFields.getLeft(), responseFields.getRight());
96+
return new ConvertTableResponse(Collections.singletonList(internalTable));
97+
}
98+
99+
private ConversionSourceProvider<?> getConversionSourceProvider(String sourceTableFormat) {
100+
if (sourceTableFormat.equalsIgnoreCase(HUDI)) {
101+
ConversionSourceProvider<HoodieInstant> hudiConversionSourceProvider =
102+
new HudiConversionSourceProvider();
103+
hudiConversionSourceProvider.init(sparkHolder.jsc().hadoopConfiguration());
104+
return hudiConversionSourceProvider;
105+
} else if (sourceTableFormat.equalsIgnoreCase(DELTA)) {
106+
ConversionSourceProvider<Long> deltaConversionSourceProvider =
107+
new DeltaConversionSourceProvider();
108+
deltaConversionSourceProvider.init(sparkHolder.jsc().hadoopConfiguration());
109+
return deltaConversionSourceProvider;
110+
} else if (sourceTableFormat.equalsIgnoreCase(ICEBERG)) {
111+
ConversionSourceProvider<Snapshot> icebergConversionSourceProvider =
112+
new IcebergConversionSourceProvider();
113+
icebergConversionSourceProvider.init(sparkHolder.jsc().hadoopConfiguration());
114+
return icebergConversionSourceProvider;
115+
} else {
116+
throw new IllegalArgumentException("Unsupported source format: " + sourceTableFormat);
117+
}
118+
}
119+
120+
public static Pair<String, String> getIcebergSchemaAndMetadataPath(String tableLocation, Configuration conf) {
121+
HadoopTables tables = new HadoopTables(conf);
122+
Table table = tables.load(tableLocation);
123+
TableOperations ops = ((BaseTable) table).operations();
124+
TableMetadata current = ops.current();
125+
return Pair.of(current.metadataFileLocation(), SchemaParser.toJson(current.schema()));
126+
}
127+
128+
}

0 commit comments

Comments
 (0)