Skip to content

Commit 5192933

Browse files
authored
Merge branch 'main' into add_support_ansi_mode_avg_expr
2 parents bd83978 + bc05ef8 commit 5192933

File tree

519 files changed

+51238
-40976
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

519 files changed

+51238
-40976
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: Setup Iceberg Builder
19+
description: 'Setup Apache Iceberg to run Spark SQL tests'
20+
inputs:
21+
iceberg-version:
22+
description: 'The Apache Iceberg version (e.g., 1.8.1) to build'
23+
required: true
24+
runs:
25+
using: "composite"
26+
steps:
27+
- name: Clone Iceberg repo
28+
uses: actions/checkout@v4
29+
with:
30+
repository: apache/iceberg
31+
path: apache-iceberg
32+
ref: apache-iceberg-${{inputs.iceberg-version}}
33+
fetch-depth: 1
34+
35+
- name: Setup Iceberg for Comet
36+
shell: bash
37+
run: |
38+
cd apache-iceberg
39+
git apply ../dev/diffs/iceberg-rust/${{inputs.iceberg-version}}.diff

.github/workflows/iceberg_spark_test.yml

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,120 @@ jobs:
156156
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
157157
:iceberg-spark:iceberg-spark-runtime-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:integrationTest \
158158
-Pquick=true -x javadoc
159+
160+
iceberg-spark-rust:
161+
if: contains(github.event.pull_request.title, '[iceberg]')
162+
strategy:
163+
matrix:
164+
os: [ubuntu-24.04]
165+
java-version: [11, 17]
166+
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
167+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
168+
scala-version: ['2.13']
169+
fail-fast: false
170+
name: iceberg-spark-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
171+
runs-on: ${{ matrix.os }}
172+
container:
173+
image: amd64/rust
174+
env:
175+
SPARK_LOCAL_IP: localhost
176+
steps:
177+
- uses: actions/checkout@v5
178+
- name: Setup Rust & Java toolchain
179+
uses: ./.github/actions/setup-builder
180+
with:
181+
rust-version: ${{env.RUST_VERSION}}
182+
jdk-version: ${{ matrix.java-version }}
183+
- name: Build Comet
184+
shell: bash
185+
run: |
186+
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
187+
- name: Setup Iceberg
188+
uses: ./.github/actions/setup-iceberg-rust-builder
189+
with:
190+
iceberg-version: ${{ matrix.iceberg-version.full }}
191+
- name: Run Iceberg Spark tests (Rust)
192+
run: |
193+
cd apache-iceberg
194+
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
195+
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
196+
:iceberg-spark:iceberg-spark-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:test \
197+
-Pquick=true -x javadoc
198+
199+
iceberg-spark-extensions-rust:
200+
if: contains(github.event.pull_request.title, '[iceberg]')
201+
strategy:
202+
matrix:
203+
os: [ubuntu-24.04]
204+
java-version: [11, 17]
205+
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
206+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
207+
scala-version: ['2.13']
208+
fail-fast: false
209+
name: iceberg-spark-extensions-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
210+
runs-on: ${{ matrix.os }}
211+
container:
212+
image: amd64/rust
213+
env:
214+
SPARK_LOCAL_IP: localhost
215+
steps:
216+
- uses: actions/checkout@v5
217+
- name: Setup Rust & Java toolchain
218+
uses: ./.github/actions/setup-builder
219+
with:
220+
rust-version: ${{env.RUST_VERSION}}
221+
jdk-version: ${{ matrix.java-version }}
222+
- name: Build Comet
223+
shell: bash
224+
run: |
225+
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
226+
- name: Setup Iceberg
227+
uses: ./.github/actions/setup-iceberg-rust-builder
228+
with:
229+
iceberg-version: ${{ matrix.iceberg-version.full }}
230+
- name: Run Iceberg Spark extensions tests (Rust)
231+
run: |
232+
cd apache-iceberg
233+
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
234+
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
235+
:iceberg-spark:iceberg-spark-extensions-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:test \
236+
-Pquick=true -x javadoc
237+
238+
iceberg-spark-runtime-rust:
239+
if: contains(github.event.pull_request.title, '[iceberg]')
240+
strategy:
241+
matrix:
242+
os: [ubuntu-24.04]
243+
java-version: [11, 17]
244+
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
245+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
246+
scala-version: ['2.13']
247+
fail-fast: false
248+
name: iceberg-spark-runtime-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
249+
runs-on: ${{ matrix.os }}
250+
container:
251+
image: amd64/rust
252+
env:
253+
SPARK_LOCAL_IP: localhost
254+
steps:
255+
- uses: actions/checkout@v5
256+
- name: Setup Rust & Java toolchain
257+
uses: ./.github/actions/setup-builder
258+
with:
259+
rust-version: ${{env.RUST_VERSION}}
260+
jdk-version: ${{ matrix.java-version }}
261+
- name: Build Comet
262+
shell: bash
263+
run: |
264+
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
265+
- name: Setup Iceberg
266+
uses: ./.github/actions/setup-iceberg-rust-builder
267+
with:
268+
iceberg-version: ${{ matrix.iceberg-version.full }}
269+
- name: Run Iceberg Spark runtime tests (Rust)
270+
run: |
271+
cd apache-iceberg
272+
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
273+
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
274+
:iceberg-spark:iceberg-spark-runtime-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:integrationTest \
275+
-Pquick=true -x javadoc

.github/workflows/pr_build_linux.yml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ jobs:
103103
value: |
104104
org.apache.comet.CometFuzzTestSuite
105105
org.apache.comet.CometFuzzAggregateSuite
106+
org.apache.comet.CometFuzzIcebergSuite
106107
org.apache.comet.CometFuzzMathSuite
107108
org.apache.comet.DataGeneratorSuite
108109
- name: "shuffle"
@@ -124,35 +125,41 @@ jobs:
124125
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
125126
org.apache.spark.sql.comet.ParquetEncryptionITCase
126127
org.apache.comet.exec.CometNativeReaderSuite
128+
org.apache.comet.CometIcebergNativeSuite
127129
- name: "exec"
128130
value: |
129131
org.apache.comet.exec.CometAggregateSuite
130132
org.apache.comet.exec.CometExec3_4PlusSuite
131133
org.apache.comet.exec.CometExecSuite
132134
org.apache.comet.exec.CometWindowExecSuite
133135
org.apache.comet.exec.CometJoinSuite
134-
org.apache.comet.CometArrayExpressionSuite
135-
org.apache.comet.CometCastSuite
136-
org.apache.comet.CometExpressionSuite
137-
org.apache.comet.CometExpressionCoverageSuite
138-
org.apache.comet.CometMathExpressionSuite
139136
org.apache.comet.CometNativeSuite
140137
org.apache.comet.CometSparkSessionExtensionsSuite
141-
org.apache.comet.CometStringExpressionSuite
142138
org.apache.spark.CometPluginsSuite
143139
org.apache.spark.CometPluginsDefaultSuite
144140
org.apache.spark.CometPluginsNonOverrideSuite
145141
org.apache.spark.CometPluginsUnifiedModeOverrideSuite
146-
org.apache.comet.CometTemporalExpressionSuite
147142
org.apache.spark.sql.CometTPCDSQuerySuite
148143
org.apache.spark.sql.CometTPCDSQueryTestSuite
149144
org.apache.spark.sql.CometTPCHQuerySuite
150145
org.apache.spark.sql.comet.CometTPCDSV1_4_PlanStabilitySuite
151146
org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite
152147
org.apache.spark.sql.comet.CometTaskMetricsSuite
148+
org.apache.comet.objectstore.NativeConfigSuite
149+
- name: "expressions"
150+
value: |
151+
org.apache.comet.CometExpressionSuite
152+
org.apache.comet.CometExpressionCoverageSuite
153+
org.apache.comet.CometTemporalExpressionSuite
154+
org.apache.comet.CometArrayExpressionSuite
155+
org.apache.comet.CometCastSuite
156+
org.apache.comet.CometMathExpressionSuite
157+
org.apache.comet.CometStringExpressionSuite
153158
org.apache.comet.CometBitwiseExpressionSuite
154159
org.apache.comet.CometMapExpressionSuite
155-
org.apache.comet.objectstore.NativeConfigSuite
160+
org.apache.comet.expressions.conditional.CometIfSuite
161+
org.apache.comet.expressions.conditional.CometCoalesceSuite
162+
org.apache.comet.expressions.conditional.CometCaseWhenSuite
156163
- name: "sql"
157164
value: |
158165
org.apache.spark.sql.CometToPrettyStringSuite

.github/workflows/pr_build_macos.yml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ jobs:
6868
value: |
6969
org.apache.comet.CometFuzzTestSuite
7070
org.apache.comet.CometFuzzAggregateSuite
71+
org.apache.comet.CometFuzzIcebergSuite
7172
org.apache.comet.CometFuzzMathSuite
7273
org.apache.comet.DataGeneratorSuite
7374
- name: "shuffle"
@@ -89,35 +90,41 @@ jobs:
8990
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
9091
org.apache.spark.sql.comet.ParquetEncryptionITCase
9192
org.apache.comet.exec.CometNativeReaderSuite
93+
org.apache.comet.CometIcebergNativeSuite
9294
- name: "exec"
9395
value: |
9496
org.apache.comet.exec.CometAggregateSuite
9597
org.apache.comet.exec.CometExec3_4PlusSuite
9698
org.apache.comet.exec.CometExecSuite
9799
org.apache.comet.exec.CometWindowExecSuite
98100
org.apache.comet.exec.CometJoinSuite
99-
org.apache.comet.CometArrayExpressionSuite
100-
org.apache.comet.CometCastSuite
101-
org.apache.comet.CometExpressionSuite
102-
org.apache.comet.CometExpressionCoverageSuite
103-
org.apache.comet.CometMathExpressionSuite
104101
org.apache.comet.CometNativeSuite
105102
org.apache.comet.CometSparkSessionExtensionsSuite
106-
org.apache.comet.CometStringExpressionSuite
107103
org.apache.spark.CometPluginsSuite
108104
org.apache.spark.CometPluginsDefaultSuite
109105
org.apache.spark.CometPluginsNonOverrideSuite
110106
org.apache.spark.CometPluginsUnifiedModeOverrideSuite
111-
org.apache.comet.CometTemporalExpressionSuite
112107
org.apache.spark.sql.CometTPCDSQuerySuite
113108
org.apache.spark.sql.CometTPCDSQueryTestSuite
114109
org.apache.spark.sql.CometTPCHQuerySuite
115110
org.apache.spark.sql.comet.CometTPCDSV1_4_PlanStabilitySuite
116111
org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite
117112
org.apache.spark.sql.comet.CometTaskMetricsSuite
113+
org.apache.comet.objectstore.NativeConfigSuite
114+
- name: "expressions"
115+
value: |
116+
org.apache.comet.CometExpressionSuite
117+
org.apache.comet.CometExpressionCoverageSuite
118+
org.apache.comet.CometTemporalExpressionSuite
119+
org.apache.comet.CometArrayExpressionSuite
120+
org.apache.comet.CometCastSuite
121+
org.apache.comet.CometMathExpressionSuite
122+
org.apache.comet.CometStringExpressionSuite
118123
org.apache.comet.CometBitwiseExpressionSuite
119124
org.apache.comet.CometMapExpressionSuite
120-
org.apache.comet.objectstore.NativeConfigSuite
125+
org.apache.comet.expressions.conditional.CometIfSuite
126+
org.apache.comet.expressions.conditional.CometCoalesceSuite
127+
org.apache.comet.expressions.conditional.CometCaseWhenSuite
121128
- name: "sql"
122129
value: |
123130
org.apache.spark.sql.CometToPrettyStringSuite

common/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ under the License.
2626
<parent>
2727
<groupId>org.apache.datafusion</groupId>
2828
<artifactId>comet-parent-spark${spark.version.short}_${scala.binary.version}</artifactId>
29-
<version>0.12.0-SNAPSHOT</version>
29+
<version>0.13.0-SNAPSHOT</version>
3030
<relativePath>../pom.xml</relativePath>
3131
</parent>
3232

common/src/main/java/org/apache/comet/vector/CometListVector.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public CometListVector(
4545

4646
@Override
4747
public ColumnarArray getArray(int i) {
48+
if (isNullAt(i)) return null;
4849
int start = listVector.getOffsetBuffer().getInt(i * ListVector.OFFSET_WIDTH);
4950
int end = listVector.getOffsetBuffer().getInt((i + 1) * ListVector.OFFSET_WIDTH);
5051

common/src/main/java/org/apache/comet/vector/CometMapVector.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ public CometMapVector(
6565

6666
@Override
6767
public ColumnarMap getMap(int i) {
68+
if (isNullAt(i)) return null;
6869
int start = mapVector.getOffsetBuffer().getInt(i * MapVector.OFFSET_WIDTH);
6970
int end = mapVector.getOffsetBuffer().getInt((i + 1) * MapVector.OFFSET_WIDTH);
7071

common/src/main/java/org/apache/comet/vector/CometPlainVector.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ public double getDouble(int rowId) {
123123

124124
@Override
125125
public UTF8String getUTF8String(int rowId) {
126+
if (isNullAt(rowId)) return null;
126127
if (!isBaseFixedWidthVector) {
127128
BaseVariableWidthVector varWidthVector = (BaseVariableWidthVector) valueVector;
128129
long offsetBufferAddress = varWidthVector.getOffsetBuffer().memoryAddress();
@@ -147,6 +148,7 @@ public UTF8String getUTF8String(int rowId) {
147148

148149
@Override
149150
public byte[] getBinary(int rowId) {
151+
if (isNullAt(rowId)) return null;
150152
int offset;
151153
int length;
152154
if (valueVector instanceof BaseVariableWidthVector) {

common/src/main/java/org/apache/comet/vector/CometVector.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ public boolean isFixedLength() {
8585

8686
@Override
8787
public Decimal getDecimal(int i, int precision, int scale) {
88+
if (isNullAt(i)) return null;
8889
if (!useDecimal128 && precision <= Decimal.MAX_INT_DIGITS() && type instanceof IntegerType) {
8990
return createDecimal(getInt(i), precision, scale);
9091
} else if (precision <= Decimal.MAX_LONG_DIGITS()) {

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,16 @@ object CometConf extends ShimCometConf {
122122
Set(SCAN_NATIVE_COMET, SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT, SCAN_AUTO))
123123
.createWithEnvVarOrDefault("COMET_PARQUET_SCAN_IMPL", SCAN_AUTO)
124124

125+
val COMET_ICEBERG_NATIVE_ENABLED: ConfigEntry[Boolean] =
126+
conf("spark.comet.scan.icebergNative.enabled")
127+
.category(CATEGORY_SCAN)
128+
.doc(
129+
"Whether to enable native Iceberg table scan using iceberg-rust. When enabled, " +
130+
"Iceberg tables are read directly through native execution, bypassing Spark's " +
131+
"DataSource V2 API for better performance.")
132+
.booleanConf
133+
.createWithDefault(false)
134+
125135
val COMET_RESPECT_PARQUET_FILTER_PUSHDOWN: ConfigEntry[Boolean] =
126136
conf("spark.comet.parquet.respectFilterPushdown")
127137
.category(CATEGORY_PARQUET)

0 commit comments

Comments
 (0)