Skip to content

Commit 5dce4fb

Browse files
author
Raunak Bhagat
authored
[FEAT] Add steps to spin up, submit job, and spin down ray clusters (Eventual-Inc#3403)
# Overview - new steps that: - spin up - submit job - spin down ray clusters ## Note If any of the previous steps fail, the "tear-down" step (responsible for tearing down the ray cluster), will still always run. (The only way this tear-down step would not be run is if the workflow is *manually* cancelled).
1 parent 31a7abc commit 5dce4fb

File tree

4 files changed

+132
-34
lines changed

4 files changed

+132
-34
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
cluster_name: '{{RAY_CLUSTER_NAME}}'
2+
3+
provider:
4+
type: aws
5+
region: us-west-2
6+
cache_stopped_nodes: true
7+
security_group:
8+
GroupName: ray-autoscaler-c1
9+
10+
auth:
11+
ssh_user: ubuntu
12+
ssh_private_key: ~/.ssh/ci-github-actions-ray-cluster-key.pem
13+
14+
max_workers: 2
15+
available_node_types:
16+
ray.head.default:
17+
resources: {"CPU": 0}
18+
node_config:
19+
KeyName: ci-github-actions-ray-cluster-key
20+
InstanceType: i3.2xlarge
21+
ImageId: ami-04dd23e62ed049936
22+
IamInstanceProfile:
23+
Name: ray-autoscaler-v1
24+
25+
ray.worker.default:
26+
min_workers: 2
27+
max_workers: 2
28+
resources: {}
29+
node_config:
30+
KeyName: ci-github-actions-ray-cluster-key
31+
InstanceType: i3.2xlarge
32+
ImageId: ami-04dd23e62ed049936
33+
IamInstanceProfile:
34+
Name: ray-autoscaler-v1
35+
36+
setup_commands:
37+
# Mount drive
38+
- |
39+
findmnt /tmp 1> /dev/null
40+
code=$?
41+
if [ $code -ne 0 ]; then
42+
sudo mkfs.ext4 /dev/nvme0n1
43+
sudo mount -t ext4 /dev/nvme0n1 /tmp
44+
sudo chmod 777 /tmp
45+
fi
46+
# Install dependencies
47+
# GitHub Actions workflow will replace all parameters between `{{...}}` with the
48+
# actual values as determined dynamically during runtime of the actual workflow.
49+
- sudo snap install aws-cli --classic
50+
- curl -LsSf https://astral.sh/uv/install.sh | sh
51+
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
52+
- source ~/.bashrc
53+
- uv python install {{PYTHON_VERSION}}
54+
- uv python pin {{PYTHON_VERSION}}
55+
- uv v
56+
- echo "source $HOME/.venv/bin/activate" >> $HOME/.bashrc
57+
- source .venv/bin/activate
58+
- uv pip install pip ray[default] py-spy getdaft{{DAFT_VERSION}}

.github/workflows/build-commit.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Build a Daft commit and store the outputted wheel in AWS S3
1+
name: build-commit
22

33
on:
44
workflow_dispatch:

.github/workflows/run-cluster.yaml

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
name: run-cluster
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
daft_version:
7+
type: string
8+
description: The wheel artifact to use
9+
required: false
10+
python_version:
11+
type: string
12+
description: The version of python to use
13+
required: false
14+
default: "3.9"
15+
16+
jobs:
17+
run-command:
18+
runs-on: [self-hosted, linux, x64, ci-dev]
19+
timeout-minutes: 15 # Remove for ssh debugging
20+
permissions:
21+
id-token: write
22+
contents: read
23+
steps:
24+
- name: Checkout repo
25+
uses: actions/checkout@v4
26+
with:
27+
fetch-depth: 1
28+
- name: Configure AWS credentials
29+
uses: aws-actions/configure-aws-credentials@v4
30+
with:
31+
aws-region: us-west-2
32+
role-session-name: run-command-workflow
33+
- name: Install uv, rust, python
34+
uses: ./.github/actions/install
35+
with:
36+
python_version: ${{ inputs.python_version }}
37+
- name: Setup uv environment
38+
run: |
39+
uv v
40+
source .venv/bin/activate
41+
uv pip install ray[default] boto3
42+
- name: Dynamically update ray config file
43+
run: |
44+
id="ray-ci-run-${{ github.run_id }}_${{ github.run_attempt }}"
45+
sed -i "s|{{RAY_CLUSTER_NAME}}|$id|g" .github/assets/benchmarking_ray_config.yaml
46+
sed -i 's|{{PYTHON_VERSION}}|${{ inputs.python_version }}|g' .github/assets/benchmarking_ray_config.yaml
47+
if [[ '${{ inputs.daft_version }}' ]]; then
48+
sed -i 's|{{DAFT_VERSION}}|==${{ inputs.daft_version }}|g' .github/assets/benchmarking_ray_config.yaml
49+
else
50+
sed -i 's|{{DAFT_VERSION}}||g' .github/assets/benchmarking_ray_config.yaml
51+
fi
52+
- name: Download private ssh key
53+
run: |
54+
KEY=$(aws secretsmanager get-secret-value --secret-id ci-github-actions-ray-cluster-key-3 --query SecretString --output text)
55+
echo "$KEY" >> ~/.ssh/ci-github-actions-ray-cluster-key.pem
56+
chmod 600 ~/.ssh/ci-github-actions-ray-cluster-key.pem
57+
- name: Spin up ray cluster
58+
run: |
59+
source .venv/bin/activate
60+
ray up .github/assets/benchmarking_ray_config.yaml -y
61+
- name: Setup connection to ray cluster
62+
run: |
63+
source .venv/bin/activate
64+
ray dashboard .github/assets/benchmarking_ray_config.yaml &
65+
- name: Submit job to ray cluster
66+
run: |
67+
source .venv/bin/activate
68+
ray job submit --address http://localhost:8265 -- python -c "print('Hello, world!')"
69+
- name: Spin down ray cluster
70+
if: always()
71+
run: |
72+
source .venv/bin/activate
73+
ray down .github/assets/benchmarking_ray_config.yaml -y

.github/workflows/run-command-on-ray.yaml

Lines changed: 0 additions & 33 deletions
This file was deleted.

0 commit comments

Comments
 (0)