Skip to content

Commit bd8a0a2

Browse files
Merge pull request #1 from TomAugspurger/tom/initial-setup
Initial setup for dask-upstream-testing
2 parents 5f105bc + ff9bfda commit bd8a0a2

File tree

7 files changed

+116
-108
lines changed

7 files changed

+116
-108
lines changed

.github/workflows/cron.yaml

Lines changed: 35 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,38 @@
1-
on:
2-
# TODO: change to cron-based schedule one this is working
3-
push:
4-
branches:
5-
- main
1+
# Based off https://github.com/rapidsai/cudf/blob/branch-25.02/.github/workflows/pandas-tests.yaml
2+
name: Test dask-upstream
63

4+
on:
5+
schedule:
6+
# 18:15 UTC daily.
7+
# We want to run after the nightly pipeline finishes.
8+
# https://github.com/rapidsai/workflows/blob/main/.github/workflows/nightly-pipeline-trigger.yaml is
9+
# currently set to 5:00 UTC and takes ~12 hours
10+
- cron: "15 18 * * *"
11+
712
jobs:
8-
test:
9-
name: "Test dask and distributed"
10-
# TODO: change to appropriate image
11-
runs-on: "linux-amd64-gpu-v100-latest-1"
12-
container:
13-
image: rapidsai/distributed:24.12-cuda11.8.0-devel-ubuntu20.04-py3.12
14-
env:
15-
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
13+
setup:
14+
runs-on: ubuntu-latest
15+
outputs:
16+
date: ${{ steps.date.outputs.date }}
17+
branch: ${{ steps.branch.outputs.branch }}
1618
steps:
17-
- name: Checkout ourselves
18-
uses: actions/checkout@v4
19-
with:
20-
path: utils
21-
- name: Checkout dask
22-
uses: actions/checkout@v4
23-
with:
24-
repository: dask/dask
25-
path: dask
26-
- name: Checkout distributed
27-
uses: actions/checkout@v4
28-
with:
29-
repository: dask/distributed
30-
path: distributed
31-
- name: Run
32-
run: |
33-
(cd dask; git rev-parse HEAD;
34-
cd ../distributed; git rev-parse HEAD) | tee commit-hashes.txt
35-
- name: Upload commit hashes
36-
uses: actions/upload-artifact@v4
37-
with:
38-
name: commit-hashes.txt
39-
path: commit-hashes.txt
40-
- name: Setup python
41-
uses: actions/setup-python@v5
42-
with:
43-
python-version: 3.12
44-
- name: Get last artifact URL from last run
45-
id: get_last_id
46-
run: |
47-
pip install requests
48-
VAL=$(python utils/get.py)
49-
echo "${VAL}"
50-
echo "${VAL}" >> $GITHUB_OUTPUT
51-
- name: Download artifact from last run if exists
52-
if: ${{ fromJSON(steps.get_last_id.outputs.INFO).exists }}
53-
continue-on-error: true
54-
uses: actions/download-artifact@v4
55-
with:
56-
name: commit-hashes.txt
57-
path: previous-run
58-
github-token: ${{ secrets.GITHUB_TOKEN }}
59-
run-id: ${{ fromJSON(steps.get_last_id.outputs.INFO).id }}
60-
- name: Check if test run is needed
61-
id: check_run_needed
62-
run: |
63-
ls -l previous-run/
64-
if [ ! -f previous-run/commit-hashes.txt ]; then
65-
echo "No previous run hashes, need to re-run"
66-
echo 'INFO={"rerun": true}' >> $GITHUB_OUTPUT
67-
elif cmp -s commit-hashes.txt previous-run/commit-hashes.txt; then
68-
echo "Previous run hash same as this one, no need to re-run"
69-
echo 'INFO={"rerun": false}' >> $GITHUB_OUTPUT
70-
else
71-
echo "Previous run hash different, need to re-run"
72-
echo 'INFO={"rerun": true}' >> $GITHUB_OUTPUT
73-
fi
74-
- name: Run tests
75-
if: ${{ fromJSON(steps.check_run_needed.outputs.INFO).rerun }}
76-
run: |
77-
echo Running tests
78-
nvidia-smi
19+
- name: Get current date
20+
id: date
21+
run: echo name=date::$(date +'%Y-%m-%d')" >> "$GITHUB_OUTPUT"
22+
- name: Get current branch
23+
id: branch
24+
run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
25+
26+
dask-tests:
27+
needs: setup
28+
# run the Dask and Distributed unit tests
29+
secrets: inherit
30+
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
31+
with:
32+
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
33+
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
34+
build_type: nightly
35+
branch: ${{ needs.setup.outputs.branch }}
36+
date: ${{ needs.setup.outputs.date }}
37+
sha: ${{ github.sha }}
38+
script: scripts/run.sh

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
dask
2+
distributed

.pre-commit-config.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION.
2+
3+
repos:
4+
- repo: https://github.com/pre-commit/pre-commit-hooks
5+
rev: v5.0.0
6+
hooks:
7+
- id: trailing-whitespace
8+
exclude: |
9+
(?x)^(
10+
^cpp/cmake/thirdparty/patches/.*|
11+
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
12+
)
13+
- id: end-of-file-fixer
14+
exclude: |
15+
(?x)^(
16+
^cpp/cmake/thirdparty/patches/.*|
17+
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
18+
)

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,9 @@
11
# Dask Upstream Testing
2+
3+
This repository contains the scripts to run Dask's `gpu`-marked tests on a schedule.
4+
5+
## Version Policy
6+
7+
The primary goal here is to quickly identify breakages in tests defined in `dask/dask` and `dask/distributed`, so we'll use the latest `main` from each of those.
8+
9+
When breakages occur, they'll generally be fixed either in Dask or in the the nightly versions of the downstream packages (rapids, cupy, numba, etc.). And so we install the nightly (rather than `latest`) version of the downstream packages.

get.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

scripts/run.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
3+
4+
# Install
5+
set -euo pipefail
6+
7+
RAPIDS_PY_CUDA_SUFFIX="cu${RAPIDS_CUDA_VERSION:-12}"
8+
9+
# TODO: set this to main once dask-cudf is compatible
10+
# DASK_VERSION=main
11+
DASK_VERSION=2024.12.1
12+
export PIP_YES=true
13+
export PIP_PRE=true
14+
15+
pip install --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \
16+
"cudf-${RAPIDS_PY_CUDA_SUFFIX}" \
17+
"dask-cudf-${RAPIDS_PY_CUDA_SUFFIX}" \
18+
"ucx-py-${RAPIDS_PY_CUDA_SUFFIX}" \
19+
"scipy" \
20+
"dask-cuda"
21+
22+
echo "Installing dask@{DASK_VERSION}"
23+
24+
if [ ! -d "dask" ]; then
25+
git clone https://github.com/dask/dask
26+
fi
27+
28+
if [ ! -d "distributed" ]; then
29+
git clone https://github.com/dask/distributed
30+
fi
31+
32+
pip uninstall dask distributed
33+
cd dask && git clean -fdx && git checkout $DASK_VERSION && pip install -e .[test] && cd ..
34+
cd distributed && git clean -fdx && git checkout $DASK_VERSION && pip install -e . && cd ..
35+
36+
./scripts/test.sh

scripts/test.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
3+
4+
pushd dask
5+
pytest dask -v -m gpu
6+
dask_status=$?
7+
popd
8+
9+
pushd distributed
10+
pytest distributed -v -m gpu --runslow
11+
distributed_status=$?
12+
popd
13+
14+
if [ $dask_status -ne 0 ] || [ $distributed_status -ne 0 ]; then
15+
echo "Tests faild"
16+
exit 1
17+
fi

0 commit comments

Comments
 (0)