Skip to content

Commit c471113

Browse files
committed
Merge branch 'master' into proto-changes
2 parents a9b7e11 + 0c9af99 commit c471113

File tree

2,208 files changed

+91647
-44757
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,208 files changed

+91647
-44757
lines changed

.github/workflows/benchmark.yml

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ on:
5050
description: 'Number of job splits'
5151
required: true
5252
default: '1'
53+
create-commit:
54+
type: boolean
55+
description: 'Commit the benchmark results to the current branch'
56+
required: true
57+
default: false
5358

5459
jobs:
5560
matrix-gen:
@@ -195,10 +200,31 @@ jobs:
195200
# To keep the directory structure and file permissions, tar them
196201
# See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
197202
echo "Preparing the benchmark results:"
198-
tar -cvf benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude=tpcds-sf-1-text --exclude-standard`
203+
tar -cvf target/benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude=tpcds-sf-1-text --exclude-standard`
204+
- name: Create a pull request with the results
205+
if: ${{ inputs.create-commit && success() }}
206+
run: |
207+
git config --local user.name "${{ github.actor }}"
208+
git config --local user.email "${{ github.event.pusher.email || format('{0}@users.noreply.github.com', github.actor) }}"
209+
git add -A
210+
git commit -m "Benchmark results for ${{ inputs.class }} (JDK ${{ inputs.jdk }}, Scala ${{ inputs.scala }}, split ${{ matrix.split }} of ${{ inputs.num-splits }})"
211+
for i in {1..5}; do
212+
echo "Attempt $i to push..."
213+
git fetch origin ${{ github.ref_name }}
214+
git rebase origin/${{ github.ref_name }}
215+
if git push origin ${{ github.ref_name }}:${{ github.ref_name }}; then
216+
echo "Push successful."
217+
exit 0
218+
else
219+
echo "Push failed, retrying in 3 seconds..."
220+
sleep 3
221+
fi
222+
done
223+
echo "Error: Failed to push after 5 attempts."
224+
exit 1
199225
- name: Upload benchmark results
200226
uses: actions/upload-artifact@v4
201227
with:
202228
name: benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}-${{ matrix.split }}
203-
path: benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar
229+
path: target/benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar
204230

.github/workflows/build_and_test.yml

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ jobs:
112112
ui=false
113113
docs=false
114114
fi
115-
build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,profiler,protobuf,yarn,connect,sql,hive,pipelines"`
115+
build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,utils-java,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,profiler,protobuf,yarn,connect,sql,hive,pipelines"`
116116
precondition="
117117
{
118118
\"build\": \"$build\",
@@ -122,7 +122,8 @@ jobs:
122122
\"tpcds-1g\": \"$tpcds\",
123123
\"docker-integration-tests\": \"$docker\",
124124
\"lint\" : \"true\",
125-
\"java25\" : \"true\",
125+
\"java17\" : \"$build\",
126+
\"java25\" : \"$build\",
126127
\"docs\" : \"$docs\",
127128
\"yarn\" : \"$yarn\",
128129
\"k8s-integration-tests\" : \"$kubernetes\",
@@ -241,7 +242,7 @@ jobs:
241242
# Note that the modules below are from sparktestsupport/modules.py.
242243
modules:
243244
- >-
244-
core, unsafe, kvstore, avro, utils,
245+
core, unsafe, kvstore, avro, utils, utils-java,
245246
network-common, network-shuffle, repl, launcher,
246247
examples, sketch, variant
247248
- >-
@@ -361,7 +362,7 @@ jobs:
361362
- name: Install Python packages (Python 3.11)
362363
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') || contains(matrix.modules, 'yarn')
363364
run: |
364-
python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1'
365+
python3.11 -m pip install 'numpy>=1.22' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1'
365366
python3.11 -m pip list
366367
# Run the tests.
367368
- name: Run tests
@@ -520,13 +521,9 @@ jobs:
520521
- >-
521522
pyspark-pandas-slow
522523
- >-
523-
pyspark-pandas-connect-part0
524+
pyspark-pandas-connect-part0, pyspark-pandas-connect-part3
524525
- >-
525-
pyspark-pandas-connect-part1
526-
- >-
527-
pyspark-pandas-connect-part2
528-
- >-
529-
pyspark-pandas-connect-part3
526+
pyspark-pandas-connect-part1, pyspark-pandas-connect-part2
530527
exclude:
531528
# Always run if pyspark == 'true', even infra-image is skip (such as non-master job)
532529
# In practice, the build will run in individual PR, but not against the individual commit
@@ -606,8 +603,9 @@ jobs:
606603
run: |
607604
for py in $(echo $PYTHON_TO_TEST | tr "," "\n")
608605
do
609-
echo $py
606+
$py --version
610607
$py -m pip list
608+
echo ""
611609
done
612610
- name: Install Conda for pip packaging test
613611
if: contains(matrix.modules, 'pyspark-errors')
@@ -920,6 +918,24 @@ jobs:
920918
- name: R linter
921919
run: ./dev/lint-r
922920

921+
java17:
922+
needs: [precondition]
923+
if: fromJson(needs.precondition.outputs.required).java17 == 'true'
924+
name: Java 17 build with Maven
925+
runs-on: ubuntu-latest
926+
timeout-minutes: 120
927+
steps:
928+
- uses: actions/checkout@v4
929+
- uses: actions/setup-java@v4
930+
with:
931+
distribution: zulu
932+
java-version: 17
933+
- name: Build with Maven
934+
run: |
935+
export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
936+
export MAVEN_CLI_OPTS="--no-transfer-progress"
937+
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl clean install
938+
923939
java25:
924940
needs: [precondition]
925941
if: fromJson(needs.precondition.outputs.required).java25 == 'true'
@@ -1017,7 +1033,7 @@ jobs:
10171033
# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
10181034
python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
10191035
python3.9 -m pip install ipython_genutils # See SPARK-38517
1020-
python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly<6.0.0'
1036+
python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.22' pyarrow pandas 'plotly<6.0.0'
10211037
python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
10221038
- name: List Python packages for branch-3.5 and branch-4.0
10231039
if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'

.github/workflows/build_infra_images_cache.yml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ on:
3333
- 'dev/spark-test-image/python-minimum/Dockerfile'
3434
- 'dev/spark-test-image/python-ps-minimum/Dockerfile'
3535
- 'dev/spark-test-image/pypy-310/Dockerfile'
36-
- 'dev/spark-test-image/python-309/Dockerfile'
3736
- 'dev/spark-test-image/python-310/Dockerfile'
3837
- 'dev/spark-test-image/python-311/Dockerfile'
3938
- 'dev/spark-test-image/python-311-classic-only/Dockerfile'
@@ -153,19 +152,6 @@ jobs:
153152
- name: Image digest (PySpark with PyPy 3.10)
154153
if: hashFiles('dev/spark-test-image/pypy-310/Dockerfile') != ''
155154
run: echo ${{ steps.docker_build_pyspark_pypy_310.outputs.digest }}
156-
- name: Build and push (PySpark with Python 3.9)
157-
if: hashFiles('dev/spark-test-image/python-309/Dockerfile') != ''
158-
id: docker_build_pyspark_python_309
159-
uses: docker/build-push-action@v6
160-
with:
161-
context: ./dev/spark-test-image/python-309/
162-
push: true
163-
tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }}-static
164-
cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }}
165-
cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }},mode=max
166-
- name: Image digest (PySpark with Python 3.9)
167-
if: hashFiles('dev/spark-test-image/python-309/Dockerfile') != ''
168-
run: echo ${{ steps.docker_build_pyspark_python_309.outputs.digest }}
169155
- name: Build and push (PySpark with Python 3.10)
170156
if: hashFiles('dev/spark-test-image/python-310/Dockerfile') != ''
171157
id: docker_build_pyspark_python_310

.github/workflows/build_maven_java21_arm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, ARM)"
2121

2222
on:
2323
schedule:
24-
- cron: '0 15 * * *'
24+
- cron: '0 15 */2 * *'
2525
workflow_dispatch:
2626

2727
jobs:

.github/workflows/build_non_ansi.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ jobs:
4040
"PYSPARK_IMAGE_TO_TEST": "python-311",
4141
"PYTHON_TO_TEST": "python3.11",
4242
"SPARK_ANSI_SQL_MODE": "false",
43+
"SPARK_TEST_SPARK_BLOOM_FILTER_SUITE_ENABLED": "true"
4344
}
4445
jobs: >-
4546
{

.github/workflows/build_python_connect.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
python packaging/client/setup.py sdist
7373
cd dist
7474
pip install pyspark*client-*.tar.gz
75-
pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.0' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting
75+
pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting
7676
- name: List Python packages
7777
run: python -m pip list
7878
- name: Run tests (local)

.github/workflows/build_python_connect35.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jobs:
6868
./build/sbt -Phive Test/package
6969
- name: Install Python dependencies
7070
run: |
71-
pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
71+
pip install 'numpy==1.25.1' 'pyarrow>=18.0.0' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
7272
7373
# Add Python deps for Spark Connect.
7474
pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'

.github/workflows/maven_test.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ jobs:
6767
- hive2.3
6868
modules:
6969
- >-
70-
core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils,common#variant
70+
core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils,common#utils-java,common#variant
7171
- >-
7272
graphx,streaming,hadoop-cloud
7373
- >-
@@ -181,7 +181,7 @@ jobs:
181181
- name: Install Python packages (Python 3.11)
182182
if: contains(matrix.modules, 'resource-managers#yarn') || (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect')
183183
run: |
184-
python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1'
184+
python3.11 -m pip install 'numpy>=1.22' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1'
185185
python3.11 -m pip list
186186
# Run the tests.
187187
- name: Run tests
@@ -214,6 +214,10 @@ jobs:
214214
# SPARK-52441: Remove sql/pipelines from TEST_MODULES for branch-4.0, this branch can be deleted after the EOL of branch-4.0.
215215
TEST_MODULES=${TEST_MODULES/,sql\/pipelines/}
216216
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
217+
elif [[ "$MODULES_TO_TEST" == *"common#utils-java"* && "$INPUT_BRANCH" == "branch-4.0" ]]; then
218+
# SPARK-53138: Remove common/utils-java from TEST_MODULES for branch-4.0, this branch can be deleted after the EOL of branch-4.0.
219+
TEST_MODULES=${TEST_MODULES/,common\/utils-java/}
220+
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
217221
else
218222
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
219223
fi

.github/workflows/pages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ jobs:
6161
- name: Install Python dependencies
6262
run: |
6363
pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
64-
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.3.0' 'plotly>=4.8' 'docutils<0.18.0' \
64+
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.2' 'plotly>=4.8' 'docutils<0.18.0' \
6565
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
6666
'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
6767
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'

.github/workflows/python_hosted_runner_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ jobs:
149149
run: |
150150
python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2'
151151
python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0'
152-
python${{matrix.python}} -m pip install numpy 'pyarrow>=19.0.0' 'six==1.16.0' 'pandas==2.3.0' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \
152+
python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \
153153
python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \
154154
python${{matrix.python}} -m pip cache purge
155155
- name: List Python packages

0 commit comments

Comments
 (0)