From 4cc487cd3620ecb9a6dc02a15929cd9f071f6dc4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 8 Jan 2025 14:46:55 -0800 Subject: [PATCH 1/8] Added runner for upstream dask gpu tests This adds a Cron Job to the github actions that runs the `gpu` marked tests from dask and distributed. The setup here is modeled after https://github.com/rapidsai/cudf/blob/branch-25.02/.github/workflows/pandas-tests.yaml, which is running pandas tests against the nightly versions of cudf. --- .github/workflows/cron.yaml | 99 ++++++++++--------------------------- .gitignore | 2 + README.md | 8 +++ get.py | 33 ------------- scripts/run.sh | 35 +++++++++++++ scripts/test | 17 +++++++ 6 files changed, 87 insertions(+), 107 deletions(-) create mode 100644 .gitignore delete mode 100644 get.py create mode 100755 scripts/run.sh create mode 100755 scripts/test diff --git a/.github/workflows/cron.yaml b/.github/workflows/cron.yaml index 04c1d60..d2ce58a 100644 --- a/.github/workflows/cron.yaml +++ b/.github/workflows/cron.yaml @@ -1,78 +1,29 @@ +# Based off https://github.com/rapidsai/cudf/blob/branch-25.02/.github/workflows/pandas-tests.yaml +name: Test dask-upstream + on: - # TODO: change to cron-based schedule one this is working - push: - branches: - - main + workflow_dispatch: + inputs: + branch: + required: true + type: string + date: + required: true + type: string + sha: + required: true + type: string jobs: - test: - name: "Test dask and distributed" - # TODO: change to appropriate image - runs-on: "linux-amd64-gpu-v100-latest-1" - container: - image: rapidsai/distributed:24.12-cuda11.8.0-devel-ubuntu20.04-py3.12 - env: - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} - steps: - - name: Checkout ourselves - uses: actions/checkout@v4 - with: - path: utils - - name: Checkout dask - uses: actions/checkout@v4 - with: - repository: dask/dask - path: dask - - name: Checkout distributed - uses: actions/checkout@v4 - with: - repository: dask/distributed - path: distributed - - name: Run - run: | - (cd dask; git rev-parse HEAD; - cd ../distributed; git rev-parse HEAD) | tee commit-hashes.txt - - name: Upload commit hashes - uses: actions/upload-artifact@v4 - with: - name: commit-hashes.txt - path: commit-hashes.txt - - name: Setup python - uses: actions/setup-python@v5 - with: - python-version: 3.12 - - name: Get last artifact URL from last run - id: get_last_id - run: | - pip install requests - VAL=$(python utils/get.py) - echo "${VAL}" - echo "${VAL}" >> $GITHUB_OUTPUT - - name: Download artifact from last run if exists - if: ${{ fromJSON(steps.get_last_id.outputs.INFO).exists }} - continue-on-error: true - uses: actions/download-artifact@v4 + dask-tests: + # run the Dask and Distributed unit tests + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: - name: commit-hashes.txt - path: previous-run - github-token: ${{ secrets.GITHUB_TOKEN }} - run-id: ${{ fromJSON(steps.get_last_id.outputs.INFO).id }} - - name: Check if test run is needed - id: check_run_needed - run: | - ls -l previous-run/ - if [ ! -f previous-run/commit-hashes.txt ]; then - echo "No previous run hashes, need to re-run" - echo 'INFO={"rerun": true}' >> $GITHUB_OUTPUT - elif cmp -s commit-hashes.txt previous-run/commit-hashes.txt; then - echo "Previous run hash same as this one, no need to re-run" - echo 'INFO={"rerun": false}' >> $GITHUB_OUTPUT - else - echo "Previous run hash different, need to re-run" - echo 'INFO={"rerun": true}' >> $GITHUB_OUTPUT - fi - - name: Run tests - if: ${{ fromJSON(steps.check_run_needed.outputs.INFO).rerun }} - run: | - echo Running tests - nvidia-smi + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: scripts/run.sh \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f729cf7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +dask +distributed \ No newline at end of file diff --git a/README.md b/README.md index fd0021a..9d0646c 100644 --- a/README.md +++ b/README.md @@ -1 +1,9 @@ # Dask Upstream Testing + +This repository contains the scripts to run Dask's `gpu`-marked tests on a schedule. + +## Version Policy + +The primary goal here is to quickly identify breakages in tests defined in `dask/dask` and `dask/distributed`, so we'll use the latest `main` from each of those. + +When breakages occur, they'll generally be fixed either in Dask or in the the nightly versions of the downstream packages (rapids, cupy, numba, etc.). And so we install the nightly (rather than `latest`) version of the downstream packages. \ No newline at end of file diff --git a/get.py b/get.py deleted file mode 100644 index bcaf266..0000000 --- a/get.py +++ /dev/null @@ -1,33 +0,0 @@ -import requests -import json - - -def previous_run_id() -> str | None: - req = requests.get( - "https://api.github.com/repos/rapidsai/dask-upstream-testing/actions/artifacts", - headers={ - "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - }, - params={"name": "commit-hashes.txt", "page": 1, "per_page": 1}, - ) - if req.status_code != 200: - return None - artifacts = req.json()["artifacts"] - try: - (artifact,) = artifacts - run_id = artifact["workflow_run"]["id"] - return run_id - except ValueError: - # Didn't get exactly one artifact, assume we must rebuild - return None - - -if __name__ == "__main__": - run_id = previous_run_id() - if run_id is not None: - info = json.dumps({"id": run_id, "exists": True}) - print(f"INFO={info}") - else: - info = json.dumps({"exists": False}) - print(f"INFO={info}") diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100755 index 0000000..04af272 --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. + +# Install +set -euo pipefail + +# RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_CUDA_SUFFIX=12 +# TODO: set this to main once dask-cudf is compatible +# DASK_VERSION=main +DASK_VERSION=2024.12.1 +export PIP_YES=true +export PIP_PRE=true + +# Should this use nightly wheels or rapids-download-wheels-from-s3? + +pip install --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \ + "cudf-cu12" \ + "dask-cudf-cu12" + +echo "Installing dask@{DASK_VERSION}" + +if [ ! -d "dask" ]; then + git clone https://github.com/dask/dask +fi + +if [ ! -d "distributed" ]; then + git clone https://github.com/dask/distributed +fi + +pip uninstall dask distributed +cd dask && git clean -fdx && git checkout $DASK_VERSION && pip install -e .[test] && cd .. +cd distributed && git clean -fdx && git checkout $DASK_VERSION && pip install -e . && cd .. + +./scripts/test \ No newline at end of file diff --git a/scripts/test b/scripts/test new file mode 100755 index 0000000..0789e75 --- /dev/null +++ b/scripts/test @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. + +pushd dask +pytest dask -v -m gpu +dask_status=$? +popd + +pushd distributed +pytest distributed -v -m gpu +distributed_status=$? +popd + +if [ $dask_status -ne 0 ] || [ $distributed_status -ne 0 ]; then + echo "Tests faild" + exit 1 +fi From fcaaf08f5b399b26294f7aaaabb5d4854341632d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 9 Jan 2025 07:58:28 -0800 Subject: [PATCH 2/8] Narrow the skips --- scripts/run.sh | 5 ++++- scripts/test | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/run.sh b/scripts/run.sh index 04af272..9afee96 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -16,7 +16,10 @@ export PIP_PRE=true pip install --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \ "cudf-cu12" \ - "dask-cudf-cu12" + "dask-cudf-cu12" \ + "scipy" \ + "ucx" \ + "dask-cuda" echo "Installing dask@{DASK_VERSION}" diff --git a/scripts/test b/scripts/test index 0789e75..07c6d67 100755 --- a/scripts/test +++ b/scripts/test @@ -7,7 +7,7 @@ dask_status=$? popd pushd distributed -pytest distributed -v -m gpu +pytest distributed -v -m gpu --runslow distributed_status=$? popd From 15a4ff8d1653a1917ec8c2e44eb43f958bd2a421 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 9 Jan 2025 09:56:23 -0800 Subject: [PATCH 3/8] added pre-commit --- .pre-commit-config.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..d261b54 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + exclude: | + (?x)^( + ^cpp/cmake/thirdparty/patches/.*| + ^python/cudf/cudf/tests/data/subword_tokenizer_data/.* + ) + - id: end-of-file-fixer + exclude: | + (?x)^( + ^cpp/cmake/thirdparty/patches/.*| + ^python/cudf/cudf/tests/data/subword_tokenizer_data/.* + ) From bec8e96cdbe23c0fce233bda87d9afbbc66e89dd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 9 Jan 2025 09:57:30 -0800 Subject: [PATCH 4/8] rename --- scripts/{test => test.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{test => test.sh} (100%) diff --git a/scripts/test b/scripts/test.sh similarity index 100% rename from scripts/test rename to scripts/test.sh From 0edfa21db6bc57bdecb69fdca0e95598b86b6958 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 9 Jan 2025 10:11:14 -0800 Subject: [PATCH 5/8] Fixup --- scripts/run.sh | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/scripts/run.sh b/scripts/run.sh index 9afee96..c262c56 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -4,21 +4,19 @@ # Install set -euo pipefail -# RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_CUDA_SUFFIX=12 +RAPIDS_PY_CUDA_SUFFIX="cu${RAPIDS_CUDA_VERSION:-12}" + # TODO: set this to main once dask-cudf is compatible # DASK_VERSION=main DASK_VERSION=2024.12.1 export PIP_YES=true export PIP_PRE=true -# Should this use nightly wheels or rapids-download-wheels-from-s3? - pip install --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \ - "cudf-cu12" \ - "dask-cudf-cu12" \ + "cudf-${RAPIDS_PY_CUDA_SUFFIX}" \ + "dask-cudf-${RAPIDS_PY_CUDA_SUFFIX}" \ + "ucx-py-${RAPIDS_PY_CUDA_SUFFIX}" \ "scipy" \ - "ucx" \ "dask-cuda" echo "Installing dask@{DASK_VERSION}" @@ -35,4 +33,4 @@ pip uninstall dask distributed cd dask && git clean -fdx && git checkout $DASK_VERSION && pip install -e .[test] && cd .. cd distributed && git clean -fdx && git checkout $DASK_VERSION && pip install -e . && cd .. -./scripts/test \ No newline at end of file +./scripts/test.sh \ No newline at end of file From 564f155213a3025abbb145ef8b1beca4436c05f4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 9 Jan 2025 10:13:28 -0800 Subject: [PATCH 6/8] Added pre-commit --- .github/workflows/cron.yaml | 2 +- .gitignore | 2 +- README.md | 2 +- scripts/run.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cron.yaml b/.github/workflows/cron.yaml index d2ce58a..6000b6d 100644 --- a/.github/workflows/cron.yaml +++ b/.github/workflows/cron.yaml @@ -26,4 +26,4 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} - script: scripts/run.sh \ No newline at end of file + script: scripts/run.sh diff --git a/.gitignore b/.gitignore index f729cf7..76d16e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ dask -distributed \ No newline at end of file +distributed diff --git a/README.md b/README.md index 9d0646c..c635c6e 100644 --- a/README.md +++ b/README.md @@ -6,4 +6,4 @@ This repository contains the scripts to run Dask's `gpu`-marked tests on a sched The primary goal here is to quickly identify breakages in tests defined in `dask/dask` and `dask/distributed`, so we'll use the latest `main` from each of those. -When breakages occur, they'll generally be fixed either in Dask or in the the nightly versions of the downstream packages (rapids, cupy, numba, etc.). And so we install the nightly (rather than `latest`) version of the downstream packages. \ No newline at end of file +When breakages occur, they'll generally be fixed either in Dask or in the the nightly versions of the downstream packages (rapids, cupy, numba, etc.). And so we install the nightly (rather than `latest`) version of the downstream packages. diff --git a/scripts/run.sh b/scripts/run.sh index c262c56..5370efe 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -33,4 +33,4 @@ pip uninstall dask distributed cd dask && git clean -fdx && git checkout $DASK_VERSION && pip install -e .[test] && cd .. cd distributed && git clean -fdx && git checkout $DASK_VERSION && pip install -e . && cd .. -./scripts/test.sh \ No newline at end of file +./scripts/test.sh From 11f4ce19de6dd6313e8512e6f3197658336e9cdf Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 10 Jan 2025 09:02:29 -0800 Subject: [PATCH 7/8] Define the schedule here --- .github/workflows/cron.yaml | 51 ++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/.github/workflows/cron.yaml b/.github/workflows/cron.yaml index 6000b6d..7820a22 100644 --- a/.github/workflows/cron.yaml +++ b/.github/workflows/cron.yaml @@ -2,28 +2,33 @@ name: Test dask-upstream on: - workflow_dispatch: - inputs: - branch: - required: true - type: string - date: - required: true - type: string - sha: - required: true - type: string - + schedule: + # 18:15 UTC daily. + # We want to run after the nightly pipeline finishes. + # https://github.com/rapidsai/workflows/blob/main/.github/workflows/nightly-pipeline-trigger.yaml is + # currently set to 5:00 UTC and takes ~12 hours + - cron: "15 18 * * *" + jobs: + setup: + runs-on: ubuntu-latest + outputs: + date: ${{ steps.date.outputs.date }} + steps: + - name: Get current date + id: date + run: echo name=date::$(date +'%Y-%m-%d')" >> "$GITHUB_OUTPUT" + dask-tests: - # run the Dask and Distributed unit tests - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 - with: - # This selects "ARCH=amd64 + the latest supported Python + CUDA". - matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) - build_type: nightly - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - script: scripts/run.sh + needs: setup + # run the Dask and Distributed unit tests + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + with: + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + build_type: nightly + branch: main + date: ${{ needs.setup.outputs.date }} + sha: ${{ github.sha }} + script: scripts/run.sh From ff9bfdab3b8ca9f74589b5904e77a8ecd5fc1273 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 10 Jan 2025 09:04:13 -0800 Subject: [PATCH 8/8] fixed branch --- .github/workflows/cron.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cron.yaml b/.github/workflows/cron.yaml index 7820a22..3245aa2 100644 --- a/.github/workflows/cron.yaml +++ b/.github/workflows/cron.yaml @@ -14,10 +14,14 @@ jobs: runs-on: ubuntu-latest outputs: date: ${{ steps.date.outputs.date }} + branch: ${{ steps.branch.outputs.branch }} steps: - name: Get current date id: date run: echo name=date::$(date +'%Y-%m-%d')" >> "$GITHUB_OUTPUT" + - name: Get current branch + id: branch + run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT dask-tests: needs: setup @@ -28,7 +32,7 @@ jobs: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: nightly - branch: main + branch: ${{ needs.setup.outputs.branch }} date: ${{ needs.setup.outputs.date }} sha: ${{ github.sha }} script: scripts/run.sh