Skip to content

Commit 1ed499c

Browse files
authored
[fix] Resolve issues in toml file (#472)
1 parent 35ce808 commit 1ed499c

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Flash RL Examples
2+
3+
## Installation
4+
5+
The `flashrl` extra does not automatically install vLLM due to PyPI package distribution constraints. You must install the custom vLLM wheel manually.
6+
7+
## Install custom vLLM
8+
9+
```bash
10+
# Install the custom vLLM wheel
11+
uv pip install https://github.com/NovaSky-AI/SkyRL/releases/download/skyrl_train-v0.1.0/vllm-0.1.dev7509+gcc487699a.d20250821-cp312-cp312-linux_x86_64.whl
12+
```
13+
14+
## Running Examples
15+
16+
After installing the custom vLLM wheel, you can run the Flash RL examples:
17+
18+
```bash
19+
# Example: DAPO with FlashRL using 0.5B model with FP8
20+
bash examples/flash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh
21+
22+
# Example: DAPO with FlashRL using 0.5B model with INT8
23+
bash examples/flash_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh
24+
25+
# Example: DAPO with FlashRL using 32B model with INT8
26+
bash examples/flash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh
27+
```

skyrl-train/examples/gsm8k/run_gsm8k.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ uv run --isolated --extra $INFERENCE_BACKEND -m skyrl_train.entrypoints.main_bas
2525
trainer.placement.colocate_all=true \
2626
trainer.strategy=fsdp2 \
2727
trainer.placement.policy_num_gpus_per_node=$NUM_GPUS \
28+
trainer.placement.critic_num_gpus_per_node=$NUM_GPUS \
2829
trainer.placement.ref_num_gpus_per_node=$NUM_GPUS \
2930
generator.num_inference_engines=$NUM_GPUS \
3031
generator.inference_engine_tensor_parallel_size=1 \

skyrl-train/pyproject.toml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ dependencies = [
4040
"flash-attn",
4141
"polars",
4242
"s3fs",
43+
"fastapi",
44+
"uvicorn",
4345
]
4446

4547
[tool.uv]
@@ -76,9 +78,6 @@ flashinfer-python = [
7678
{ url = "https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl", marker = "extra =='vllm'" },
7779
{ url = "https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl", marker = "extra == 'sglang' and extra != 'vllm'" }
7880
]
79-
# Custom vllm wheel for flashrl extra - built from commit 4b04dfc at https://github.com/SumanthRH/vllm/tree/flashrl
80-
flashrl-vllm = { url = "https://github.com/NovaSky-AI/SkyRL/releases/download/skyrl_train-v0.1.0/vllm-0.1.dev7509+gcc487699a.d20250821-cp312-cp312-linux_x86_64.whl" }
81-
8281

8382
[project.optional-dependencies]
8483
deepspeed = [
@@ -90,8 +89,6 @@ dev = [
9089
"pytest>=6.2.5",
9190
"pytest-asyncio",
9291
"pre-commit",
93-
"fastapi",
94-
"uvicorn",
9592
"litellm",
9693
]
9794
docs = [
@@ -133,8 +130,8 @@ mcore = [
133130
"megatron-core==0.13.0",
134131
]
135132
flashrl = [
136-
# NOTE: Flashrl requires a custom vLLM wheel.
137-
"flashrl-vllm",
133+
# NOTE: Custom vLLM wheel must be installed separately.
134+
# See examples/flash_rl/README.md for installation instructions.
138135
"torch==2.7.0",
139136
"flashinfer-python",
140137
"torchvision",

0 commit comments

Comments
 (0)