[fix] Resolve issues in toml file (#472)

tyler-griggs · web-flow · commit 1ed499cce4df · 2025-10-13T12:42:07.000-07:00
diff --git a/skyrl-train/examples/flash_rl/README.md b/skyrl-train/examples/flash_rl/README.md
@@ -0,0 +1,27 @@
+# Flash RL Examples
+
+## Installation
+
+The `flashrl` extra does not automatically install vLLM due to PyPI package distribution constraints. You must install the custom vLLM wheel manually.
+
+## Install custom vLLM
+
+```bash
+# Install the custom vLLM wheel
+uv pip install https://github.com/NovaSky-AI/SkyRL/releases/download/skyrl_train-v0.1.0/vllm-0.1.dev7509+gcc487699a.d20250821-cp312-cp312-linux_x86_64.whl
+```
+
+## Running Examples
+
+After installing the custom vLLM wheel, you can run the Flash RL examples:
+
+```bash
+# Example: DAPO with FlashRL using 0.5B model with FP8
+bash examples/flash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh
+
+# Example: DAPO with FlashRL using 0.5B model with INT8
+bash examples/flash_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh
+
+# Example: DAPO with FlashRL using 32B model with INT8
+bash examples/flash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh
+```
diff --git a/skyrl-train/examples/gsm8k/run_gsm8k.sh b/skyrl-train/examples/gsm8k/run_gsm8k.sh
@@ -25,6 +25,7 @@ uv run --isolated --extra $INFERENCE_BACKEND -m skyrl_train.entrypoints.main_bas
   trainer.placement.colocate_all=true \
   trainer.strategy=fsdp2 \
   trainer.placement.policy_num_gpus_per_node=$NUM_GPUS \
+  trainer.placement.critic_num_gpus_per_node=$NUM_GPUS \
   trainer.placement.ref_num_gpus_per_node=$NUM_GPUS \
   generator.num_inference_engines=$NUM_GPUS \
   generator.inference_engine_tensor_parallel_size=1 \
diff --git a/skyrl-train/pyproject.toml b/skyrl-train/pyproject.toml
@@ -40,6 +40,8 @@ dependencies = [
     "flash-attn",
     "polars",
     "s3fs",
+    "fastapi",
+    "uvicorn",
 ]
 
 [tool.uv]
@@ -76,9 +78,6 @@ flashinfer-python = [
     { url = "https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl", marker = "extra =='vllm'" }, 
     { url = "https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl", marker = "extra == 'sglang' and extra != 'vllm'" }
 ]
-# Custom vllm wheel for flashrl extra - built from commit 4b04dfc at https://github.com/SumanthRH/vllm/tree/flashrl
-flashrl-vllm = { url = "https://github.com/NovaSky-AI/SkyRL/releases/download/skyrl_train-v0.1.0/vllm-0.1.dev7509+gcc487699a.d20250821-cp312-cp312-linux_x86_64.whl" }
-
 
 [project.optional-dependencies]
 deepspeed = [
@@ -90,8 +89,6 @@ dev = [
     "pytest>=6.2.5",
     "pytest-asyncio",
     "pre-commit",
-    "fastapi",
-    "uvicorn",
     "litellm",
 ]
 docs = [
@@ -133,8 +130,8 @@ mcore = [
   "megatron-core==0.13.0",
 ]
 flashrl = [
-    # NOTE: Flashrl requires a custom vLLM wheel.
-    "flashrl-vllm",
+    # NOTE: Custom vLLM wheel must be installed separately.
+    # See examples/flash_rl/README.md for installation instructions.
     "torch==2.7.0",
     "flashinfer-python",
     "torchvision",