Skip to content

Commit a0ac45a

Browse files
authored
[Feature] Add plugin contract test suite (#1652)
1 parent 52fc971 commit a0ac45a

File tree

13 files changed

+1294
-27
lines changed

13 files changed

+1294
-27
lines changed

.github/workflows/pr-test.yml

Lines changed: 149 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ concurrency:
2626
jobs:
2727

2828
e2e-test-short:
29+
2930
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-short'))
31+
3032
runs-on: self-hosted
3133
container:
3234
image: slimerl/slime:latest
@@ -62,18 +64,29 @@ jobs:
6264

6365
steps:
6466
- name: Checkout repository
65-
uses: actions/checkout@v6
67+
uses: actions/checkout@v4
6668

6769
- name: Install
6870
shell: bash
6971
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
7072

7173
- name: Execute
7274
shell: bash
73-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
75+
run: |
76+
TEST_PATH="${{ matrix.info.test_file }}"
77+
if [[ "$TEST_PATH" != tests/* ]]; then
78+
TEST_PATH="tests/$TEST_PATH"
79+
fi
80+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
81+
python "$TEST_PATH"
82+
else
83+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
84+
fi
7485
7586
e2e-test-fsdp:
87+
7688
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-fsdp'))
89+
7790
runs-on: self-hosted
7891
container:
7992
image: slimerl/slime:latest
@@ -109,18 +122,29 @@ jobs:
109122

110123
steps:
111124
- name: Checkout repository
112-
uses: actions/checkout@v6
125+
uses: actions/checkout@v4
113126

114127
- name: Install
115128
shell: bash
116129
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
117130

118131
- name: Execute
119132
shell: bash
120-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
133+
run: |
134+
TEST_PATH="${{ matrix.info.test_file }}"
135+
if [[ "$TEST_PATH" != tests/* ]]; then
136+
TEST_PATH="tests/$TEST_PATH"
137+
fi
138+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
139+
python "$TEST_PATH"
140+
else
141+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
142+
fi
121143
122144
e2e-test-megatron:
145+
123146
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-megatron'))
147+
124148
runs-on: self-hosted
125149
container:
126150
image: slimerl/slime:latest
@@ -156,18 +180,29 @@ jobs:
156180

157181
steps:
158182
- name: Checkout repository
159-
uses: actions/checkout@v6
183+
uses: actions/checkout@v4
160184

161185
- name: Install
162186
shell: bash
163187
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
164188

165189
- name: Execute
166190
shell: bash
167-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
191+
run: |
192+
TEST_PATH="${{ matrix.info.test_file }}"
193+
if [[ "$TEST_PATH" != tests/* ]]; then
194+
TEST_PATH="tests/$TEST_PATH"
195+
fi
196+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
197+
python "$TEST_PATH"
198+
else
199+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
200+
fi
168201
169202
e2e-test-precision:
203+
170204
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-precision'))
205+
171206
runs-on: self-hosted
172207
container:
173208
image: slimerl/slime:latest
@@ -203,18 +238,29 @@ jobs:
203238

204239
steps:
205240
- name: Checkout repository
206-
uses: actions/checkout@v6
241+
uses: actions/checkout@v4
207242

208243
- name: Install
209244
shell: bash
210245
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
211246

212247
- name: Execute
213248
shell: bash
214-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
249+
run: |
250+
TEST_PATH="${{ matrix.info.test_file }}"
251+
if [[ "$TEST_PATH" != tests/* ]]; then
252+
TEST_PATH="tests/$TEST_PATH"
253+
fi
254+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
255+
python "$TEST_PATH"
256+
else
257+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
258+
fi
215259
216260
e2e-test-ckpt:
261+
217262
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-ckpt'))
263+
218264
runs-on: self-hosted
219265
container:
220266
image: slimerl/slime:latest
@@ -250,18 +296,87 @@ jobs:
250296

251297
steps:
252298
- name: Checkout repository
253-
uses: actions/checkout@v6
299+
uses: actions/checkout@v4
254300

255301
- name: Install
256302
shell: bash
257303
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
258304

259305
- name: Execute
260306
shell: bash
261-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
307+
run: |
308+
TEST_PATH="${{ matrix.info.test_file }}"
309+
if [[ "$TEST_PATH" != tests/* ]]; then
310+
TEST_PATH="tests/$TEST_PATH"
311+
fi
312+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
313+
python "$TEST_PATH"
314+
else
315+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
316+
fi
317+
318+
e2e-test-plugin-contracts:
319+
320+
if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch'
321+
322+
runs-on: self-hosted
323+
container:
324+
image: slimerl/slime:latest
325+
options: >
326+
--gpus all
327+
--ipc=host
328+
--shm-size=16g
329+
--ulimit memlock=-1
330+
--ulimit stack=67108864
331+
--memory=0
332+
--memory-swap=0
333+
-e http_proxy=$http_proxy
334+
-e https_proxy=$https_proxy
335+
-e HTTP_PROXY=$HTTP_PROXY
336+
-e HTTPS_PROXY=$HTTPS_PROXY
337+
-v /mnt/nvme0n1/slime_ci:/data/slime_ci
338+
-v /mnt/nvme0n1/slime_ci/models:/root/models
339+
-v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
340+
strategy:
341+
fail-fast: false
342+
matrix:
343+
info: [{"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_rollout_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_runtime_hook_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_path_loading_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_generate_contracts.py"}]
344+
defaults:
345+
run:
346+
working-directory: ${{ github.workspace }}
347+
env:
348+
GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
349+
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
350+
SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
351+
SLIME_TEST_USE_DEEPEP: ${{ matrix.info.use_deepep || '0' }}
352+
SLIME_TEST_USE_FP8_ROLLOUT: ${{ matrix.info.use_fp8_rollout || '0' }}
353+
SLIME_TEST_ENABLE_EVAL: ${{ matrix.info.enable_eval || '1' }}
354+
355+
steps:
356+
- name: Checkout repository
357+
uses: actions/checkout@v4
358+
359+
- name: Install
360+
shell: bash
361+
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
362+
363+
- name: Execute
364+
shell: bash
365+
run: |
366+
TEST_PATH="${{ matrix.info.test_file }}"
367+
if [[ "$TEST_PATH" != tests/* ]]; then
368+
TEST_PATH="tests/$TEST_PATH"
369+
fi
370+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
371+
python "$TEST_PATH"
372+
else
373+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
374+
fi
262375
263376
e2e-test-image:
377+
264378
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-image'))
379+
265380
runs-on: self-hosted
266381
container:
267382
image: slimerl/slime-test:latest
@@ -297,15 +412,24 @@ jobs:
297412

298413
steps:
299414
- name: Checkout repository
300-
uses: actions/checkout@v6
415+
uses: actions/checkout@v4
301416

302417
- name: Install
303418
shell: bash
304419
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
305420

306421
- name: Execute
307422
shell: bash
308-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
423+
run: |
424+
TEST_PATH="${{ matrix.info.test_file }}"
425+
if [[ "$TEST_PATH" != tests/* ]]; then
426+
TEST_PATH="tests/$TEST_PATH"
427+
fi
428+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
429+
python "$TEST_PATH"
430+
else
431+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
432+
fi
309433
310434
311435
e2e-test-changed-detect:
@@ -334,7 +458,7 @@ jobs:
334458
id: detect
335459
shell: bash
336460
run: |
337-
CHANGED=$(git diff --name-only --diff-filter=AM origin/main...HEAD -- 'tests/test_*.py' || true)
461+
CHANGED=$(git diff --name-only --diff-filter=AM origin/main...HEAD -- 'tests/test_*.py' 'tests/plugin_contracts/test_*.py' || true)
338462
if [ -z "$CHANGED" ]; then
339463
echo "No new or modified test files found."
340464
echo "has_tests=false" >> $GITHUB_OUTPUT
@@ -345,12 +469,11 @@ jobs:
345469
MATRIX="["
346470
FIRST=true
347471
for filepath in $CHANGED; do
348-
filename=$(basename "$filepath")
349472
# Extract NUM_GPUS from the test file, default to 8
350473
NGPU=$(grep -oP '^NUM_GPUS\s*=\s*\K\d+' "$filepath" | head -1)
351474
NGPU=${NGPU:-8}
352475
if [ "$FIRST" = true ]; then FIRST=false; else MATRIX+=","; fi
353-
MATRIX+="{\"test_file\":\"$filename\",\"num_gpus\":$NGPU}"
476+
MATRIX+="{\"test_file\":\"$filepath\",\"num_gpus\":$NGPU}"
354477
done
355478
MATRIX+="]"
356479
echo "has_tests=true" >> $GITHUB_OUTPUT
@@ -395,12 +518,21 @@ jobs:
395518

396519
steps:
397520
- name: Checkout repository
398-
uses: actions/checkout@v6
521+
uses: actions/checkout@v4
399522

400523
- name: Install
401524
shell: bash
402525
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
403526

404527
- name: Execute
405528
shell: bash
406-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
529+
run: |
530+
TEST_PATH="${{ matrix.info.test_file }}"
531+
if [[ "$TEST_PATH" != tests/* ]]; then
532+
TEST_PATH="tests/$TEST_PATH"
533+
fi
534+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
535+
python "$TEST_PATH"
536+
else
537+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
538+
fi

.github/workflows/pr-test.yml.j2

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@
4747
],
4848
},
4949

50+
'e2e-test-plugin-contracts': {
51+
'label': 'run-ci-plugin-contracts',
52+
'always': True,
53+
'tests': [
54+
{'test_file': 'plugin_contracts/test_plugin_rollout_contracts.py', 'num_gpus': 0},
55+
{'test_file': 'plugin_contracts/test_plugin_runtime_hook_contracts.py', 'num_gpus': 0},
56+
{'test_file': 'plugin_contracts/test_plugin_path_loading_contracts.py', 'num_gpus': 0},
57+
{'test_file': 'plugin_contracts/test_plugin_generate_contracts.py', 'num_gpus': 0},
58+
],
59+
},
60+
5061
'e2e-test-image': {
5162
'label': 'run-ci-image',
5263
'image': 'slimerl/slime-test:latest',
@@ -93,7 +104,11 @@ concurrency:
93104
jobs:
94105
<% for job_name, config in jobs.items() %>
95106
<< job_name >>:
107+
<% if config.get('always') %>
108+
if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch'
109+
<% else %>
96110
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, '<< config.label >>'))
111+
<% endif %>
97112
runs-on: self-hosted
98113
container:
99114
image: << config.image if config.image else 'slimerl/slime:latest' >>
@@ -137,7 +152,16 @@ jobs:
137152

138153
- name: Execute
139154
shell: bash
140-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
155+
run: |
156+
TEST_PATH="${{ matrix.info.test_file }}"
157+
if [[ "$TEST_PATH" != tests/* ]]; then
158+
TEST_PATH="tests/$TEST_PATH"
159+
fi
160+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
161+
python "$TEST_PATH"
162+
else
163+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
164+
fi
141165
<% endfor %>
142166

143167
e2e-test-changed-detect:
@@ -166,7 +190,7 @@ jobs:
166190
id: detect
167191
shell: bash
168192
run: |
169-
CHANGED=$(git diff --name-only --diff-filter=AM origin/main...HEAD -- 'tests/test_*.py' || true)
193+
CHANGED=$(git diff --name-only --diff-filter=AM origin/main...HEAD -- 'tests/test_*.py' 'tests/plugin_contracts/test_*.py' || true)
170194
if [ -z "$CHANGED" ]; then
171195
echo "No new or modified test files found."
172196
echo "has_tests=false" >> $GITHUB_OUTPUT
@@ -177,12 +201,11 @@ jobs:
177201
MATRIX="["
178202
FIRST=true
179203
for filepath in $CHANGED; do
180-
filename=$(basename "$filepath")
181204
# Extract NUM_GPUS from the test file, default to 8
182205
NGPU=$(grep -oP '^NUM_GPUS\s*=\s*\K\d+' "$filepath" | head -1)
183206
NGPU=${NGPU:-8}
184207
if [ "$FIRST" = true ]; then FIRST=false; else MATRIX+=","; fi
185-
MATRIX+="{\"test_file\":\"$filename\",\"num_gpus\":$NGPU}"
208+
MATRIX+="{\"test_file\":\"$filepath\",\"num_gpus\":$NGPU}"
186209
done
187210
MATRIX+="]"
188211
echo "has_tests=true" >> $GITHUB_OUTPUT
@@ -235,4 +258,13 @@ jobs:
235258

236259
- name: Execute
237260
shell: bash
238-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
261+
run: |
262+
TEST_PATH="${{ matrix.info.test_file }}"
263+
if [[ "$TEST_PATH" != tests/* ]]; then
264+
TEST_PATH="tests/$TEST_PATH"
265+
fi
266+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
267+
python "$TEST_PATH"
268+
else
269+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
270+
fi

0 commit comments

Comments
 (0)