Skip to content

Commit 982fc3b

Browse files
committed
Add plugin contract test suite
1 parent 52fc971 commit 982fc3b

21 files changed

+1811
-20
lines changed

.github/workflows/pr-test.yml

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,16 @@ jobs:
7070

7171
- name: Execute
7272
shell: bash
73-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
73+
run: |
74+
TEST_PATH="${{ matrix.info.test_file }}"
75+
if [[ "$TEST_PATH" != tests/* ]]; then
76+
TEST_PATH="tests/$TEST_PATH"
77+
fi
78+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
79+
python "$TEST_PATH"
80+
else
81+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
82+
fi
7483
7584
e2e-test-fsdp:
7685
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-fsdp'))
@@ -117,7 +126,16 @@ jobs:
117126

118127
- name: Execute
119128
shell: bash
120-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
129+
run: |
130+
TEST_PATH="${{ matrix.info.test_file }}"
131+
if [[ "$TEST_PATH" != tests/* ]]; then
132+
TEST_PATH="tests/$TEST_PATH"
133+
fi
134+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
135+
python "$TEST_PATH"
136+
else
137+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
138+
fi
121139
122140
e2e-test-megatron:
123141
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-megatron'))
@@ -164,7 +182,12 @@ jobs:
164182

165183
- name: Execute
166184
shell: bash
167-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
185+
run: |
186+
TEST_PATH="${{ matrix.info.test_file }}"
187+
if [[ "$TEST_PATH" != tests/* ]]; then
188+
TEST_PATH="tests/$TEST_PATH"
189+
fi
190+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
168191
169192
e2e-test-precision:
170193
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-precision'))
@@ -211,7 +234,12 @@ jobs:
211234

212235
- name: Execute
213236
shell: bash
214-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
237+
run: |
238+
TEST_PATH="${{ matrix.info.test_file }}"
239+
if [[ "$TEST_PATH" != tests/* ]]; then
240+
TEST_PATH="tests/$TEST_PATH"
241+
fi
242+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
215243
216244
e2e-test-ckpt:
217245
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-ckpt'))
@@ -258,7 +286,12 @@ jobs:
258286

259287
- name: Execute
260288
shell: bash
261-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
289+
run: |
290+
TEST_PATH="${{ matrix.info.test_file }}"
291+
if [[ "$TEST_PATH" != tests/* ]]; then
292+
TEST_PATH="tests/$TEST_PATH"
293+
fi
294+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
262295
263296
e2e-test-image:
264297
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-image'))
@@ -305,7 +338,12 @@ jobs:
305338

306339
- name: Execute
307340
shell: bash
308-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
341+
run: |
342+
TEST_PATH="${{ matrix.info.test_file }}"
343+
if [[ "$TEST_PATH" != tests/* ]]; then
344+
TEST_PATH="tests/$TEST_PATH"
345+
fi
346+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
309347
310348
311349
e2e-test-changed-detect:
@@ -334,7 +372,7 @@ jobs:
334372
id: detect
335373
shell: bash
336374
run: |
337-
CHANGED=$(git diff --name-only --diff-filter=AM origin/main...HEAD -- 'tests/test_*.py' || true)
375+
CHANGED=$(git diff --name-only --diff-filter=AM origin/main...HEAD -- 'tests/test_*.py' 'tests/plugin_contracts/test_*.py' || true)
338376
if [ -z "$CHANGED" ]; then
339377
echo "No new or modified test files found."
340378
echo "has_tests=false" >> $GITHUB_OUTPUT
@@ -345,12 +383,11 @@ jobs:
345383
MATRIX="["
346384
FIRST=true
347385
for filepath in $CHANGED; do
348-
filename=$(basename "$filepath")
349386
# Extract NUM_GPUS from the test file, default to 8
350387
NGPU=$(grep -oP '^NUM_GPUS\s*=\s*\K\d+' "$filepath" | head -1)
351388
NGPU=${NGPU:-8}
352389
if [ "$FIRST" = true ]; then FIRST=false; else MATRIX+=","; fi
353-
MATRIX+="{\"test_file\":\"$filename\",\"num_gpus\":$NGPU}"
390+
MATRIX+="{\"test_file\":\"$filepath\",\"num_gpus\":$NGPU}"
354391
done
355392
MATRIX+="]"
356393
echo "has_tests=true" >> $GITHUB_OUTPUT
@@ -403,4 +440,13 @@ jobs:
403440

404441
- name: Execute
405442
shell: bash
406-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
443+
run: |
444+
TEST_PATH="${{ matrix.info.test_file }}"
445+
if [[ "$TEST_PATH" != tests/* ]]; then
446+
TEST_PATH="tests/$TEST_PATH"
447+
fi
448+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
449+
python "$TEST_PATH"
450+
else
451+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
452+
fi

.github/workflows/pr-test.yml.j2

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,16 @@ jobs:
137137

138138
- name: Execute
139139
shell: bash
140-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
140+
run: |
141+
TEST_PATH="${{ matrix.info.test_file }}"
142+
if [[ "$TEST_PATH" != tests/* ]]; then
143+
TEST_PATH="tests/$TEST_PATH"
144+
fi
145+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
146+
python "$TEST_PATH"
147+
else
148+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
149+
fi
141150
<% endfor %>
142151

143152
e2e-test-changed-detect:
@@ -166,7 +175,7 @@ jobs:
166175
id: detect
167176
shell: bash
168177
run: |
169-
CHANGED=$(git diff --name-only --diff-filter=AM origin/main...HEAD -- 'tests/test_*.py' || true)
178+
CHANGED=$(git diff --name-only --diff-filter=AM origin/main...HEAD -- 'tests/test_*.py' 'tests/plugin_contracts/test_*.py' || true)
170179
if [ -z "$CHANGED" ]; then
171180
echo "No new or modified test files found."
172181
echo "has_tests=false" >> $GITHUB_OUTPUT
@@ -177,12 +186,11 @@ jobs:
177186
MATRIX="["
178187
FIRST=true
179188
for filepath in $CHANGED; do
180-
filename=$(basename "$filepath")
181189
# Extract NUM_GPUS from the test file, default to 8
182190
NGPU=$(grep -oP '^NUM_GPUS\s*=\s*\K\d+' "$filepath" | head -1)
183191
NGPU=${NGPU:-8}
184192
if [ "$FIRST" = true ]; then FIRST=false; else MATRIX+=","; fi
185-
MATRIX+="{\"test_file\":\"$filename\",\"num_gpus\":$NGPU}"
193+
MATRIX+="{\"test_file\":\"$filepath\",\"num_gpus\":$NGPU}"
186194
done
187195
MATRIX+="]"
188196
echo "has_tests=true" >> $GITHUB_OUTPUT
@@ -235,4 +243,13 @@ jobs:
235243

236244
- name: Execute
237245
shell: bash
238-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
246+
run: |
247+
TEST_PATH="${{ matrix.info.test_file }}"
248+
if [[ "$TEST_PATH" != tests/* ]]; then
249+
TEST_PATH="tests/$TEST_PATH"
250+
fi
251+
if [ "${{ matrix.info.num_gpus }}" = "0" ]; then
252+
python "$TEST_PATH"
253+
else
254+
python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python "$TEST_PATH"
255+
fi

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ celerybeat.pid
130130
# Environments
131131
.env
132132
.venv
133+
.venv*/
133134
env/
134135
venv/
135136
ENV/
@@ -191,4 +192,4 @@ local/
191192
glm/
192193
_examples_synced/
193194
.env
194-
.DS_Store
195+
.DS_Store

docs/en/developer_guide/ci.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ The workflow is defined in `.github/workflows/pr-test.yml` (auto-generated from
99
1. Runs on a self-hosted GPU runner inside a Docker container (`slimerl/slime:latest`).
1010
2. Installs slime with `pip install -e . --no-deps`.
1111
3. Acquires the required GPUs via `tests/ci/gpu_lock_exec.py --count <num_gpus>`.
12-
4. Executes the test file: `python tests/<test_file>.py`.
12+
4. Executes the test file: `python <test_path>.py` or `python tests/<test_file>.py`, depending on whether the test lives under `tests/` or a subdirectory such as `tests/plugin_contracts/`.
1313

1414
Each test file follows a standard pattern: a `prepare()` function downloads models/datasets, and an `execute()` function builds CLI arguments and calls `U.execute_train(...)`.
1515

@@ -35,7 +35,7 @@ All labels also run when triggered via `workflow_dispatch` (manual run from the
3535

3636
This is the most useful label for development. When you add a new test file or modify an existing one, just add `run-ci-changed` to your PR and CI will:
3737

38-
1. **Detect** which `tests/test_*.py` files are added or modified relative to `origin/main` (via `git diff --diff-filter=AM`).
38+
1. **Detect** which `tests/test_*.py` or `tests/plugin_contracts/test_*.py` files are added or modified relative to `origin/main` (via `git diff --diff-filter=AM`).
3939
2. **Extract** the `NUM_GPUS` value from each detected test file automatically.
4040
3. **Build** a dynamic GitHub Actions matrix and run each test in parallel.
4141

@@ -107,3 +107,27 @@ The workflow file `pr-test.yml` is auto-generated from the Jinja2 template `pr-t
107107
1. Edit `.github/workflows/pr-test.yml.j2`.
108108
2. Run `python .github/workflows/generate_github_workflows.py`.
109109
3. Commit both files.
110+
111+
## Customization Contract Tests
112+
113+
For CPU-only contract tests that validate hooks loaded from function paths, run:
114+
115+
```bash
116+
python -m pytest \
117+
tests/plugin_contracts/test_plugin_rollout_contracts.py \
118+
tests/plugin_contracts/test_plugin_eval_function_contracts.py \
119+
tests/plugin_contracts/test_plugin_generate_contracts.py \
120+
tests/plugin_contracts/test_plugin_custom_rm_contracts.py \
121+
tests/plugin_contracts/test_plugin_dynamic_filter_contracts.py \
122+
tests/plugin_contracts/test_plugin_buffer_filter_contracts.py \
123+
tests/plugin_contracts/test_plugin_data_source_contracts.py \
124+
tests/plugin_contracts/test_plugin_custom_rollout_log_contracts.py \
125+
tests/plugin_contracts/test_plugin_custom_eval_rollout_log_contracts.py \
126+
tests/plugin_contracts/test_plugin_custom_reward_post_process_contracts.py \
127+
tests/plugin_contracts/test_plugin_custom_convert_samples_to_train_data_contracts.py \
128+
tests/plugin_contracts/test_plugin_rollout_sample_filter_contracts.py \
129+
tests/plugin_contracts/test_plugin_rollout_all_samples_process_contracts.py \
130+
tests/plugin_contracts/test_plugin_rollout_data_postprocess_contracts.py
131+
```
132+
133+
These files also support direct execution as `python tests/plugin_contracts/<file>.py`. They declare `NUM_GPUS = 0`, so `run-ci-changed` can pick them up without treating them as GPU-heavy end-to-end tests.

docs/en/get_started/customization.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,3 +421,55 @@ Stabilize MoE RL training by recording and replaying expert routing decisions to
421421
| `--use-rollout-routing-replay` | R3: Replay routing from rollout during training. **Requires `--use-slime-router`**. ([arXiv:2510.11370](https://arxiv.org/abs/2510.11370)) |
422422

423423
For detailed explanation of R3 and SlimeRouter, see [Slime Router](../advanced/slime-router.md).
424+
425+
## Testing Custom Function Paths
426+
427+
slime also provides CPU-only contract tests for customization interfaces. These tests resolve components through import-path strings, so they can validate both built-in hooks and user-defined implementations passed through the same CLI arguments used by training.
428+
429+
The tests live under `tests/plugin_contracts/`, with one file per customization argument:
430+
431+
- `--rollout-function-path` -> `tests/plugin_contracts/test_plugin_rollout_contracts.py`
432+
- `--eval-function-path` -> `tests/plugin_contracts/test_plugin_eval_function_contracts.py`
433+
- `--custom-generate-function-path` -> `tests/plugin_contracts/test_plugin_generate_contracts.py`
434+
- `--custom-rm-path` -> `tests/plugin_contracts/test_plugin_custom_rm_contracts.py`
435+
- `--dynamic-sampling-filter-path` -> `tests/plugin_contracts/test_plugin_dynamic_filter_contracts.py`
436+
- `--buffer-filter-path` -> `tests/plugin_contracts/test_plugin_buffer_filter_contracts.py`
437+
- `--data-source-path` -> `tests/plugin_contracts/test_plugin_data_source_contracts.py`
438+
- `--custom-rollout-log-function-path` -> `tests/plugin_contracts/test_plugin_custom_rollout_log_contracts.py`
439+
- `--custom-eval-rollout-log-function-path` -> `tests/plugin_contracts/test_plugin_custom_eval_rollout_log_contracts.py`
440+
- `--custom-reward-post-process-path` -> `tests/plugin_contracts/test_plugin_custom_reward_post_process_contracts.py`
441+
- `--custom-convert-samples-to-train-data-path` -> `tests/plugin_contracts/test_plugin_custom_convert_samples_to_train_data_contracts.py`
442+
- `--rollout-sample-filter-path` -> `tests/plugin_contracts/test_plugin_rollout_sample_filter_contracts.py`
443+
- `--rollout-all-samples-process-path` -> `tests/plugin_contracts/test_plugin_rollout_all_samples_process_contracts.py`
444+
- `--rollout-data-postprocess-path` -> `tests/plugin_contracts/test_plugin_rollout_data_postprocess_contracts.py`
445+
446+
Run all customization contract tests locally:
447+
448+
```bash
449+
python -m pytest \
450+
tests/plugin_contracts/test_plugin_rollout_contracts.py \
451+
tests/plugin_contracts/test_plugin_eval_function_contracts.py \
452+
tests/plugin_contracts/test_plugin_generate_contracts.py \
453+
tests/plugin_contracts/test_plugin_custom_rm_contracts.py \
454+
tests/plugin_contracts/test_plugin_dynamic_filter_contracts.py \
455+
tests/plugin_contracts/test_plugin_buffer_filter_contracts.py \
456+
tests/plugin_contracts/test_plugin_data_source_contracts.py \
457+
tests/plugin_contracts/test_plugin_custom_rollout_log_contracts.py \
458+
tests/plugin_contracts/test_plugin_custom_eval_rollout_log_contracts.py \
459+
tests/plugin_contracts/test_plugin_custom_reward_post_process_contracts.py \
460+
tests/plugin_contracts/test_plugin_custom_convert_samples_to_train_data_contracts.py \
461+
tests/plugin_contracts/test_plugin_rollout_sample_filter_contracts.py \
462+
tests/plugin_contracts/test_plugin_rollout_all_samples_process_contracts.py \
463+
tests/plugin_contracts/test_plugin_rollout_data_postprocess_contracts.py
464+
```
465+
466+
Each test file can also be executed directly with `python tests/plugin_contracts/<file>.py`, which keeps them compatible with `run-ci-changed`.
467+
468+
For user-defined implementations, you can either export environment variables such as `SLIME_CONTRACT_ROLLOUT_FUNCTION_PATH` and `SLIME_CONTRACT_CUSTOM_RM_PATH`, or pass overrides directly when running a test file, for example:
469+
470+
```bash
471+
python tests/plugin_contracts/test_plugin_rollout_contracts.py \
472+
--rollout-function-path my_project.custom_rollout.generate_rollout
473+
```
474+
475+
To validate your own custom implementation, replace the plugin paths used in these tests with your module path and keep the same assertions on signatures, return structure, and side effects.

docs/zh/developer_guide/ci.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ slime 使用 GitHub Actions 进行 CI。测试通过 **PR label** 触发——
99
1. 在自托管 GPU runner 上以 Docker 容器(`slimerl/slime:latest`)运行。
1010
2. 通过 `pip install -e . --no-deps` 安装 slime。
1111
3. 通过 `tests/ci/gpu_lock_exec.py --count <num_gpus>` 获取所需数量的 GPU。
12-
4. 执行测试文件:`python tests/<test_file>.py`
12+
4. 执行测试文件:`python <test_path>.py``python tests/<test_file>.py`。如果测试位于 `tests/plugin_contracts/` 这样的子目录,CI 也会自动处理
1313

1414
每个测试文件遵循统一的模式:`prepare()` 函数下载模型和数据集,`execute()` 函数构建命令行参数并调用 `U.execute_train(...)`
1515

@@ -35,7 +35,7 @@ slime 使用 GitHub Actions 进行 CI。测试通过 **PR label** 触发——
3535

3636
这是开发中最常用的 label。当你新增或修改了测试文件时,只需给 PR 添加 `run-ci-changed`,CI 会自动:
3737

38-
1. **检测**相对于 `origin/main` 新增或修改的 `tests/test_*.py` 文件(通过 `git diff --diff-filter=AM`)。
38+
1. **检测**相对于 `origin/main` 新增或修改的 `tests/test_*.py` `tests/plugin_contracts/test_*.py` 文件(通过 `git diff --diff-filter=AM`)。
3939
2. **提取**每个测试文件中的 `NUM_GPUS` 值。
4040
3. **构建**动态 GitHub Actions matrix,并行运行每个测试。
4141

@@ -107,3 +107,27 @@ cd .github/workflows && python generate_github_workflows.py
107107
1. 编辑 `.github/workflows/pr-test.yml.j2`
108108
2. 运行 `python .github/workflows/generate_github_workflows.py`
109109
3. 同时提交两个文件。
110+
111+
## Customization 契约测试
112+
113+
如果你要运行通过函数路径加载的 customization hook 契约测试,可以使用:
114+
115+
```bash
116+
python -m pytest \
117+
tests/plugin_contracts/test_plugin_rollout_contracts.py \
118+
tests/plugin_contracts/test_plugin_eval_function_contracts.py \
119+
tests/plugin_contracts/test_plugin_generate_contracts.py \
120+
tests/plugin_contracts/test_plugin_custom_rm_contracts.py \
121+
tests/plugin_contracts/test_plugin_dynamic_filter_contracts.py \
122+
tests/plugin_contracts/test_plugin_buffer_filter_contracts.py \
123+
tests/plugin_contracts/test_plugin_data_source_contracts.py \
124+
tests/plugin_contracts/test_plugin_custom_rollout_log_contracts.py \
125+
tests/plugin_contracts/test_plugin_custom_eval_rollout_log_contracts.py \
126+
tests/plugin_contracts/test_plugin_custom_reward_post_process_contracts.py \
127+
tests/plugin_contracts/test_plugin_custom_convert_samples_to_train_data_contracts.py \
128+
tests/plugin_contracts/test_plugin_rollout_sample_filter_contracts.py \
129+
tests/plugin_contracts/test_plugin_rollout_all_samples_process_contracts.py \
130+
tests/plugin_contracts/test_plugin_rollout_data_postprocess_contracts.py
131+
```
132+
133+
这些测试文件也支持直接执行 `python tests/plugin_contracts/<file>.py`。它们声明了 `NUM_GPUS = 0`,因此可以被 `run-ci-changed` 自动识别,同时不会被当作 GPU 重型端到端测试。

0 commit comments

Comments
 (0)