File tree Expand file tree Collapse file tree 3 files changed +32
-7
lines changed
Expand file tree Collapse file tree 3 files changed +32
-7
lines changed Original file line number Diff line number Diff line change 1+ # Flash RL Examples
2+
3+ ## Installation
4+
5+ The ` flashrl ` extra does not automatically install vLLM due to PyPI package distribution constraints. You must install the custom vLLM wheel manually.
6+
7+ ## Install custom vLLM
8+
9+ ``` bash
10+ # Install the custom vLLM wheel
11+ uv pip install https://github.com/NovaSky-AI/SkyRL/releases/download/skyrl_train-v0.1.0/vllm-0.1.dev7509+gcc487699a.d20250821-cp312-cp312-linux_x86_64.whl
12+ ```
13+
14+ ## Running Examples
15+
16+ After installing the custom vLLM wheel, you can run the Flash RL examples:
17+
18+ ``` bash
19+ # Example: DAPO with FlashRL using 0.5B model with FP8
20+ bash examples/flash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh
21+
22+ # Example: DAPO with FlashRL using 0.5B model with INT8
23+ bash examples/flash_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh
24+
25+ # Example: DAPO with FlashRL using 32B model with INT8
26+ bash examples/flash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh
27+ ```
Original file line number Diff line number Diff line change @@ -25,6 +25,7 @@ uv run --isolated --extra $INFERENCE_BACKEND -m skyrl_train.entrypoints.main_bas
2525 trainer.placement.colocate_all=true \
2626 trainer.strategy=fsdp2 \
2727 trainer.placement.policy_num_gpus_per_node=$NUM_GPUS \
28+ trainer.placement.critic_num_gpus_per_node=$NUM_GPUS \
2829 trainer.placement.ref_num_gpus_per_node=$NUM_GPUS \
2930 generator.num_inference_engines=$NUM_GPUS \
3031 generator.inference_engine_tensor_parallel_size=1 \
Original file line number Diff line number Diff line change @@ -40,6 +40,8 @@ dependencies = [
4040 " flash-attn" ,
4141 " polars" ,
4242 " s3fs" ,
43+ " fastapi" ,
44+ " uvicorn" ,
4345]
4446
4547[tool .uv ]
@@ -76,9 +78,6 @@ flashinfer-python = [
7678 { url = " https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl" , marker = " extra =='vllm'" },
7779 { url = " https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl" , marker = " extra == 'sglang' and extra != 'vllm'" }
7880]
79- # Custom vllm wheel for flashrl extra - built from commit 4b04dfc at https://github.com/SumanthRH/vllm/tree/flashrl
80- flashrl-vllm = { url = " https://github.com/NovaSky-AI/SkyRL/releases/download/skyrl_train-v0.1.0/vllm-0.1.dev7509+gcc487699a.d20250821-cp312-cp312-linux_x86_64.whl" }
81-
8281
8382[project .optional-dependencies ]
8483deepspeed = [
@@ -90,8 +89,6 @@ dev = [
9089 " pytest>=6.2.5" ,
9190 " pytest-asyncio" ,
9291 " pre-commit" ,
93- " fastapi" ,
94- " uvicorn" ,
9592 " litellm" ,
9693]
9794docs = [
@@ -133,8 +130,8 @@ mcore = [
133130 " megatron-core==0.13.0" ,
134131]
135132flashrl = [
136- # NOTE: Flashrl requires a custom vLLM wheel .
137- " flashrl-vllm " ,
133+ # NOTE: Custom vLLM wheel must be installed separately .
134+ # See examples/flash_rl/README.md for installation instructions.
138135 " torch==2.7.0" ,
139136 " flashinfer-python" ,
140137 " torchvision" ,
You can’t perform that action at this time.
0 commit comments