Skip to content

Commit

Permalink
Merge pull request #1 from TomAugspurger/tom/initial-setup
Browse files Browse the repository at this point in the history
Initial setup for dask-upstream-testing
  • Loading branch information
TomAugspurger authored Jan 14, 2025
2 parents 5f105bc + ff9bfda commit bd8a0a2
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 108 deletions.
110 changes: 35 additions & 75 deletions .github/workflows/cron.yaml
Original file line number Diff line number Diff line change
@@ -1,78 +1,38 @@
on:
# TODO: change to cron-based schedule one this is working
push:
branches:
- main
# Based off https://github.com/rapidsai/cudf/blob/branch-25.02/.github/workflows/pandas-tests.yaml
name: Test dask-upstream

on:
schedule:
# 18:15 UTC daily.
# We want to run after the nightly pipeline finishes.
# https://github.com/rapidsai/workflows/blob/main/.github/workflows/nightly-pipeline-trigger.yaml is
# currently set to 5:00 UTC and takes ~12 hours
- cron: "15 18 * * *"

jobs:
test:
name: "Test dask and distributed"
# TODO: change to appropriate image
runs-on: "linux-amd64-gpu-v100-latest-1"
container:
image: rapidsai/distributed:24.12-cuda11.8.0-devel-ubuntu20.04-py3.12
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
setup:
runs-on: ubuntu-latest
outputs:
date: ${{ steps.date.outputs.date }}
branch: ${{ steps.branch.outputs.branch }}
steps:
- name: Checkout ourselves
uses: actions/checkout@v4
with:
path: utils
- name: Checkout dask
uses: actions/checkout@v4
with:
repository: dask/dask
path: dask
- name: Checkout distributed
uses: actions/checkout@v4
with:
repository: dask/distributed
path: distributed
- name: Run
run: |
(cd dask; git rev-parse HEAD;
cd ../distributed; git rev-parse HEAD) | tee commit-hashes.txt
- name: Upload commit hashes
uses: actions/upload-artifact@v4
with:
name: commit-hashes.txt
path: commit-hashes.txt
- name: Setup python
uses: actions/setup-python@v5
with:
python-version: 3.12
- name: Get last artifact URL from last run
id: get_last_id
run: |
pip install requests
VAL=$(python utils/get.py)
echo "${VAL}"
echo "${VAL}" >> $GITHUB_OUTPUT
- name: Download artifact from last run if exists
if: ${{ fromJSON(steps.get_last_id.outputs.INFO).exists }}
continue-on-error: true
uses: actions/download-artifact@v4
with:
name: commit-hashes.txt
path: previous-run
github-token: ${{ secrets.GITHUB_TOKEN }}
run-id: ${{ fromJSON(steps.get_last_id.outputs.INFO).id }}
- name: Check if test run is needed
id: check_run_needed
run: |
ls -l previous-run/
if [ ! -f previous-run/commit-hashes.txt ]; then
echo "No previous run hashes, need to re-run"
echo 'INFO={"rerun": true}' >> $GITHUB_OUTPUT
elif cmp -s commit-hashes.txt previous-run/commit-hashes.txt; then
echo "Previous run hash same as this one, no need to re-run"
echo 'INFO={"rerun": false}' >> $GITHUB_OUTPUT
else
echo "Previous run hash different, need to re-run"
echo 'INFO={"rerun": true}' >> $GITHUB_OUTPUT
fi
- name: Run tests
if: ${{ fromJSON(steps.check_run_needed.outputs.INFO).rerun }}
run: |
echo Running tests
nvidia-smi
- name: Get current date
id: date
run: echo name=date::$(date +'%Y-%m-%d')" >> "$GITHUB_OUTPUT"
- name: Get current branch
id: branch
run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT

dask-tests:
needs: setup
# run the Dask and Distributed unit tests
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: nightly
branch: ${{ needs.setup.outputs.branch }}
date: ${{ needs.setup.outputs.date }}
sha: ${{ github.sha }}
script: scripts/run.sh
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dask
distributed
18 changes: 18 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2025, NVIDIA CORPORATION.

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
exclude: |
(?x)^(
^cpp/cmake/thirdparty/patches/.*|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- id: end-of-file-fixer
exclude: |
(?x)^(
^cpp/cmake/thirdparty/patches/.*|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
# Dask Upstream Testing

This repository contains the scripts to run Dask's `gpu`-marked tests on a schedule.

## Version Policy

The primary goal here is to quickly identify breakages in tests defined in `dask/dask` and `dask/distributed`, so we'll use the latest `main` from each of those.

When breakages occur, they'll generally be fixed either in Dask or in the the nightly versions of the downstream packages (rapids, cupy, numba, etc.). And so we install the nightly (rather than `latest`) version of the downstream packages.
33 changes: 0 additions & 33 deletions get.py

This file was deleted.

36 changes: 36 additions & 0 deletions scripts/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.

# Install
set -euo pipefail

RAPIDS_PY_CUDA_SUFFIX="cu${RAPIDS_CUDA_VERSION:-12}"

# TODO: set this to main once dask-cudf is compatible
# DASK_VERSION=main
DASK_VERSION=2024.12.1
export PIP_YES=true
export PIP_PRE=true

pip install --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \
"cudf-${RAPIDS_PY_CUDA_SUFFIX}" \
"dask-cudf-${RAPIDS_PY_CUDA_SUFFIX}" \
"ucx-py-${RAPIDS_PY_CUDA_SUFFIX}" \
"scipy" \
"dask-cuda"

echo "Installing dask@{DASK_VERSION}"

if [ ! -d "dask" ]; then
git clone https://github.com/dask/dask
fi

if [ ! -d "distributed" ]; then
git clone https://github.com/dask/distributed
fi

pip uninstall dask distributed
cd dask && git clean -fdx && git checkout $DASK_VERSION && pip install -e .[test] && cd ..
cd distributed && git clean -fdx && git checkout $DASK_VERSION && pip install -e . && cd ..

./scripts/test.sh
17 changes: 17 additions & 0 deletions scripts/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.

pushd dask
pytest dask -v -m gpu
dask_status=$?
popd

pushd distributed
pytest distributed -v -m gpu --runslow
distributed_status=$?
popd

if [ $dask_status -ne 0 ] || [ $distributed_status -ne 0 ]; then
echo "Tests faild"
exit 1
fi

0 comments on commit bd8a0a2

Please sign in to comment.