-
Notifications
You must be signed in to change notification settings - Fork 1.5k
151 lines (142 loc) · 5.25 KB
/
Copy pathexamples-compat.yml
File metadata and controls
151 lines (142 loc) · 5.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
name: Examples - Backward Compatibility
permissions:
contents: read
on:
schedule:
# Every day at 6 AM UTC+8
- cron: '0 22 * * *'
workflow_dispatch:
repository_dispatch:
types: [ci-compat, ci-all]
run-name: >-
${{ github.event_name == 'repository_dispatch'
&& format(
'Backward Compatibility - PR #{0} - {1} - {2}',
github.event.client_payload.pull_number,
github.event.client_payload.ci_label,
github.event.client_payload.correlation_id
)
|| format('Backward Compatibility - {0}', github.event_name) }}
jobs:
backward-compatibility:
if: >
github.event_name != 'repository_dispatch' ||
github.event.action == 'ci-compat' ||
github.event.action == 'ci-all'
name: Backward Compatibility (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }})
runs-on: [self-hosted, 1ES.Pool=agl-runner-gpu]
timeout-minutes: 30
strategy:
matrix:
include:
- python-version: '3.10'
setup-script: 'legacy'
- python-version: '3.12'
setup-script: 'stable'
fail-fast: false
steps:
- name: Check GPU status
run: nvidia-smi
- name: Check disk space
run: df -h
- uses: actions/checkout@v6
with:
ref: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.pr_ref || (github.event.pull_request.number && format('refs/pull/{0}/merge', github.event.pull_request.number)) || github.ref }}
- uses: astral-sh/setup-uv@v7
with:
enable-cache: true
python-version: ${{ matrix.python-version }}
- name: Sync dependencies
run: |
uv sync --frozen --no-default-groups --extra apo --extra verl \
--group dev --group experiment --group agents --group torch-gpu-${{ matrix.setup-script }}
- name: Override VERL (stable)
run: |
uv pip install verl==0.5.0 vllm==0.10.2
if: matrix.setup-script == 'stable'
- name: Freeze dependencies
run: |
set -ex
uv pip freeze | tee requirements-freeze.txt
echo "UV_LOCKED=1" >> $GITHUB_ENV
echo "UV_NO_SYNC=1" >> $GITHUB_ENV
- name: Upload dependencies artifact
uses: actions/upload-artifact@v6
with:
name: dependencies-backward-compatibility-${{ matrix.python-version }}-${{ matrix.setup-script }}
path: requirements-freeze.txt
compression-level: 0
- name: Launch LiteLLM Proxy
run: |
./scripts/litellm_run.sh
env:
AZURE_API_BASE: ${{ secrets.AZURE_GROUP_SUBSCRIPTION_API_BASE }}
AZURE_API_KEY: ${{ secrets.AZURE_GROUP_SUBSCRIPTION_API_KEY }}
- name: Prepare Calc-X dataset
run: |
set -ex
cd examples/calc_x
uv run gdown --fuzzy https://drive.google.com/file/d/1FQMyKLLd6hP9dw9rfZn1EZOWNvKaDsqw/view
unzip calc-x-data.zip -d data
rm calc-x-data.zip
- name: APO example (legacy client-server style)
run: |
set -ex
cd examples/apo
uv run legacy_apo_client.py &
sleep 3 # Wait for the client to be up
uv run legacy_apo_server.py
pkill -f legacy_apo_client.py && echo "SIGTERM sent to legacy_apo_client.py" || echo "No legacy_apo_client.py process found"
while pgrep -f legacy_apo_client.py; do
echo "Waiting for legacy_apo_client.py to finish..."
sleep 5
done
echo "legacy_apo_client.py has finished."
sleep 10
env:
OPENAI_API_BASE: http://localhost:12306/
OPENAI_API_KEY: dummy
- name: Calc-X MCP sanity check
run: |
set -ex
cd examples/calc_x
uv run tests/test_mcp_calculator.py
env:
OPENAI_API_BASE: http://localhost:12306/
OPENAI_API_KEY: dummy
- name: Calc-X sanity check
run: |
set -ex
cd examples/calc_x
uv run legacy_calc_agent_debug.py
env:
OPENAI_BASE_URL: http://localhost:12306/
OPENAI_API_KEY: dummy
- name: Calc-X training (legacy client-server style)
run: |
set -ex
source .venv/bin/activate
cd examples/calc_x
../../scripts/restart_ray.sh
sleep 5
PYTHONUNBUFFERED=1 python legacy_calc_agent.py &
bash legacy_train.sh
pkill -f legacy_calc_agent.py && echo "SIGTERM sent to legacy_calc_agent.py" || echo "No legacy_calc_agent.py process found"
while pgrep -f legacy_calc_agent.py; do
echo "Waiting for legacy_calc_agent.py to finish..."
sleep 5
done
echo "legacy_calc_agent.py has finished."
sleep 10
shell: bash
env:
WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }}
WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }}
id: calc_x_train
- name: Validate Calc-X training
run: |
set -ex
uv run scripts/validate_example_wandb.py ${{ steps.calc_x_train.outputs.project_name }} ${{ steps.calc_x_train.outputs.run_name }}
env:
WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }}
WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }}