Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
93 changes: 92 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,95 @@ tensorboard_log/
*.db

# uv lock files
uv.lock
uv.lock

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
uv.lock

# PyInstaller
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Jupyter Notebook
.ipynb_checkpoints

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# MkDocs build output
site/

# IDEs and editors
.idea/
.vscode/

# OS generated files
.DS_Store
Thumbs.db

# Hydra outputs
outputs/

# Local artifacts
tinker.db
uv.lock

# Alembic - don't track pycache
tx/tinker/alembic/__pycache__/

# SQLite databases (tracked in git by default, but ignore if created locally)
*.db
Comment on lines +79 to +137
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There are a few duplicate entries in the newly added lines. uv.lock is added on lines 79 and 131, and *.db is added on line 137. These entries seem to be duplicates of existing entries in the file. Please remove the redundant lines to keep the .gitignore file clean and maintainable.

*.db-journal
*.db-wal
*.db-shm
File renamed without changes.
9 changes: 9 additions & 0 deletions ci/anyscale_gpu_ci_skyrl_train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: skyrl-train-gpu-ci
entrypoint: bash ci/gpu_ci_run_skyrl_train.sh
image_uri: novaskyai/skyrl-train-ray-2.51.1-py3.12-cu12.8
cloud: sky-anyscale-aws-us-east-1
ray_version: "2.51.1"
compute_config: l4_ci
working_dir: .
max_retries: 0

9 changes: 9 additions & 0 deletions ci/anyscale_gpu_ci_skyrl_train_megatron.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: skyrl-train-gpu-ci-megatron
entrypoint: bash ci/gpu_ci_run_skyrl_train_megatron.sh
image_uri: novaskyai/skyrl-train-ray-2.51.1-py3.12-cu12.8-megatron
cloud: sky-anyscale-aws-us-east-1
ray_version: "2.51.1"
compute_config: l4_ci
working_dir: .
max_retries: 0

2 changes: 1 addition & 1 deletion skyrl-tx/ci/gpu_ci_run.sh → ci/gpu_ci_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ set -xeuo pipefail
export CI=true

# Run GPU-specific tests
uv run --extra gpu --extra tinker --extra dev pytest tests/gpu
uv run --extra gpu --extra tinker --extra dev pytest tests/tx/gpu
38 changes: 38 additions & 0 deletions ci/gpu_ci_run_skyrl_train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -xeuo pipefail

export CI=true
# Prepare datasets used in tests.
uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
uv run examples/train/search/searchr1_dataset.py --local_dir $HOME/data/searchR1 --split test

# Run all non-megatron and non-sglang tests
uv run --directory . --isolated --extra dev --extra fsdp pytest -s tests/backends/skyrl_train/gpu/gpu_ci -m "not (sglang or integrations or megatron)"

## TODO: enable integrations and potentially migrate sglang
# # Run tests for "integrations" folder
# if add_integrations=$(uv add --active wordle --index https://hub.primeintellect.ai/will/simple/ 2>&1); then
# echo "Running integration tests"
# uv run --isolated --with verifiers@git+https://github.com/PrimeIntellect-ai/verifiers.git@15f68 -- python integrations/verifiers/prepare_dataset.py --env_id will/wordle
# uv run --directory . --isolated --extra dev --extra vllm --with verifiers@git+https://github.com/PrimeIntellect-ai/verifiers.git@15f68 pytest -s tests/gpu/gpu_ci/ -m "integrations"
# else
# echo "Skipping integrations tests. Failed to execute uv add command"
# echo "$add_integrations"
# fi

# # Run all SGLang tests
# uv run --directory . --isolated --extra dev --extra sglang pytest -s tests/gpu/gpu_ci -m "sglang"


# Run tests for vllm 0.9.2
# TODO (sumanthrh): We should have a better way to override without pinning a flash-attn wheel
uv run --isolated --extra fsdp --extra dev \
--with vllm==0.9.2 \
--with transformers==4.53.0 \
--with torch==2.7.0 \
--with "flash-attn@https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp312-cp312-linux_x86_64.whl" \
-- pytest -s -vvv tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py::test_token_based_generation -m "vllm"

# Run tests for new inference layer
_SKYRL_USE_NEW_INFERENCE=1 uv run --isolated --extra dev --extra fsdp pytest -s tests/backends/skyrl_train/gpu/gpu_ci/test_policy_local_engines_e2e.py -m "vllm"
_SKYRL_USE_NEW_INFERENCE=1 uv run --isolated --extra dev --extra fsdp pytest -s tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py -m "vllm"
9 changes: 9 additions & 0 deletions ci/gpu_ci_run_skyrl_train_megatron.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -xeuo pipefail

export CI=true
# Prepare datasets used in tests.
uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
# Run all megatron tests
uv run --directory . --isolated --extra dev --extra megatron pytest -s tests/backends/skyrl_train/gpu/gpu_ci -m "megatron"

File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ Fully asynchronous (PipelineRL / AReal style) GRPO for Qwen2.5-1.5B-Instruct on

```bash
# prepare the dataset
uv run -- python examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
uv run -- python examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k

export WANDB_API_KEY=<your_key_here>

bash examples/fully_async/fully_async_run_gsm8k.sh
bash examples/train/fully_async/fully_async_run_gsm8k.sh
```

For more details, refer to the documentation: https://docs.skyrl.ai/docs/tutorials/fully_async
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@ set -x

# Fully async GRPO training+generation for Qwen2.5-1.5B-Instruct on GSM8K.
# This bash script is copied from examples/async/async_run_gsm8k.sh, except for:
# - running examples.fully_async.main_fully_async
# - running examples.train.fully_async.main_fully_async
# - setting the generator.batched=false.
# - colocate_all=false
# - the various generator configs at the end (http, chat template, etc.)

# uv run examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
# uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
# export WANDB_API_KEY=<your_key_here>
# bash examples/fully_async/fully_async_run_gsm8k.sh
# bash examples/train/fully_async/fully_async_run_gsm8k.sh

# NOTE (sumanthrh): `micro_train_batch_size_per_gpu` and `micro_forward_batch_size_per_gpu` can be tuned

# You can override the default values with e.g.: `NUM_GPUS=1 bash examples/fully_async/fully_async_run_gsm8k.sh`.
# You can override the default values with e.g.: `NUM_GPUS=1 bash examples/train/fully_async/fully_async_run_gsm8k.sh`.

: "${DATA_DIR:="$HOME/data/gsm8k"}"
: "${NUM_INFERENCE_GPUS:=2}"
Expand All @@ -31,9 +31,9 @@ set -x
TIS_TYPE=token
TIS_IMP_RATIO_CAP=2.0

RUN_NAME=gsm8k-fully-async-qwen2.5_1.5B-useTIS_${USE_TIS}-maxStale${MAX_STALENESS_STEPS}-numCon${NUM_PARALLEL_GENERATION_WORKERS}-${NUM_POLICY_GPUS}train${NUM_INFERENCE_GPUS}gen
RUN_NAME=gsm8k-fully-async-qwen2.5_1.5B-useTIS_${TIS_TYPE}-maxStale${MAX_STALENESS_STEPS}-numCon${NUM_PARALLEL_GENERATION_WORKERS}-${NUM_POLICY_GPUS}train${NUM_INFERENCE_GPUS}gen

uv run --isolated --extra $INFERENCE_BACKEND -m examples.fully_async.main_fully_async \
uv run --isolated --extra fsdp -m examples.train.fully_async.main_fully_async \
data.train_data="['$DATA_DIR/train.parquet']" \
data.val_data="['$DATA_DIR/validation.parquet']" \
trainer.fully_async.max_staleness_steps=${MAX_STALENESS_STEPS} \
Expand Down Expand Up @@ -76,6 +76,5 @@ uv run --isolated --extra $INFERENCE_BACKEND -m examples.fully_async.main_fully_
trainer.run_name=${RUN_NAME} \
trainer.resume_mode=latest \
trainer.ckpt_path="$HOME/ckpts/${RUN_NAME}" \
trainer.resume_mode=latest \
generator.enforce_eager=true \
$@
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import hydra
from omegaconf import DictConfig
from skyrl_train.entrypoints.main_base import BasePPOExp, config_dir, validate_cfg
from skyrl_train.fully_async_trainer import FullyAsyncRayPPOTrainer
from skyrl.train.entrypoints.main_base import BasePPOExp, config_dir, validate_cfg
from skyrl.train.fully_async_trainer import FullyAsyncRayPPOTrainer
import asyncio
from skyrl_train.utils import initialize_ray
from skyrl.train.utils import initialize_ray
import ray


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ set -x

# Colocated GRPO training+generation for Qwen2.5-1.5B-Instruct on GSM8K.

# uv run examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
# uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
# export WANDB_API_KEY=<your_key_here>
# bash examples/gsm8k/run_gsm8k.sh
# bash examples/train/gsm8k/run_gsm8k.sh

# NOTE (sumanthrh): `micro_train_batch_size_per_gpu` and `micro_forward_batch_size_per_gpu` can be tuned

# You can override the default values with e.g.: `NUM_GPUS=1 bash examples/gsm8k/run_gsm8k.sh`.
# You can override the default values with e.g.: `NUM_GPUS=1 bash examples/train/gsm8k/run_gsm8k.sh`.

: "${DATA_DIR:="$HOME/data/gsm8k"}"
: "${NUM_GPUS:=4}"
Expand All @@ -17,7 +17,7 @@ set -x
: "${INFERENCE_BACKEND:=vllm}"
# : "${INFERENCE_BACKEND:=sglang}"

uv run --isolated --extra $INFERENCE_BACKEND -m skyrl_train.entrypoints.main_base \
uv run --isolated --extra fsdp -m skyrl.train.entrypoints.main_base \
data.train_data="['$DATA_DIR/train.parquet']" \
data.val_data="['$DATA_DIR/validation.parquet']" \
trainer.algorithm.advantage_estimator="grpo" \
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ set -x

# Colocated GRPO training+generation for Qwen3-0.6B on GSM8K with Megatron.

# uv run examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
# uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
# export WANDB_API_KEY=<your_key_here>
# bash examples/megatron/run_megatron.sh
# bash examples/train/megatron/run_megatron.sh

DATA_DIR="$HOME/data/gsm8k"
NUM_GPUS=4
Expand All @@ -22,7 +22,7 @@ ENABLE_TORCH_PROFILER=false
RANKS_TO_PROFILE="[0]"
SAVE_PATH="$HOME/megatron_prof/tp${MEGATRON_TP}_pp${MEGATRON_PP}_cp${MEGATRON_CP}_${MODEL_NAME}"

uv run --isolated --extra mcore -m skyrl_train.entrypoints.main_base \
uv run --isolated --extra megatron -m skyrl.train.entrypoints.main_base \
data.train_data="['$DATA_DIR/train.parquet']" \
data.val_data="['$DATA_DIR/validation.parquet']" \
trainer.algorithm.advantage_estimator="grpo" \
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Additional Reference: [Verl+Sglang Instructions](https://github.com/zhaochenyang
## Prepare Datasets
```bash
local_dir=~/data/searchR1
uv run --isolated examples/search/searchr1_dataset.py --local_dir $local_dir
uv run --isolated examples/train/search/searchr1_dataset.py --local_dir $local_dir
```

# Start the Search Engine
Expand Down Expand Up @@ -38,7 +38,7 @@ pip install uvicorn fastapi
conda activate retriever

local_dir=~/data/searchR1
python examples/search/searchr1_download.py --local_dir $local_dir
python examples/train/search/searchr1_download.py --local_dir $local_dir
cat $local_dir/part_* > $local_dir/e5_Flat.index
gzip -d $local_dir/wiki-18.jsonl.gz
```
Expand All @@ -49,13 +49,13 @@ conda activate retriever

# redirect the output to a file to avoid cluttering the terminal
# we have observed outputting to the terminal causing spikes in server response times
bash examples/search/retriever/retrieval_launch.sh > retrieval_server.log
bash examples/train/search/retriever/retrieval_launch.sh > retrieval_server.log
```

## Launch your Training Job
Now from your base environment, you can launch your training run (which will use uv to package dependencies, separately from the retriever environment).

```bash
export WANDB_API_KEY=your_wandb_api_key
bash examples/search/run_search.sh
bash examples/train/search/run_search.sh
```
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ corpus_file=$save_path/wiki-18.jsonl
retriever_name=e5
retriever_path=intfloat/e5-base-v2

python examples/search/retriever/retrieval_server.py \
python examples/train/search/retriever/retrieval_server.py \
--index_path $index_file \
--corpus_path $corpus_file \
--topk 3 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -x
# follow the instructions in examples/search/README.md for setting up the dataset
# and for starting the local search server
# export WANDB_API_KEY=<your_key_here>
# bash examples/search/run_search.sh
# bash examples/train/search/run_search.sh

# path for dataset (.parquet files) containing the prompts and metadata for each question
DATA_DIR="$HOME/data/searchR1"
Expand All @@ -14,7 +14,7 @@ RUN_NAME="skyrl-search_4turns_maxgeneratelen_500-multiturn-sync-TIS_2.0"
TIS_TYPE=token
TIS_IMP_RATIO_CAP=2.0

uv run --isolated --frozen --extra vllm -m skyrl_train.entrypoints.main_base \
uv run --isolated --frozen --extra fsdp -m skyrl.train.entrypoints.main_base \
data.train_data="['${DATA_DIR}/train.parquet']" \
data.val_data="['${DATA_DIR}/validation.parquet']" \
trainer.algorithm.advantage_estimator="grpo" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ set -x
# the search result for each turn).

# Colocated GRPO training+generation for Qwen2.5-Coder-3B-Instruct on SearchR1 data.
# follow the instructions in examples/search/README.md for setting up the dataset
# follow the instructions in examples/train/search/README.md for setting up the dataset
# and for starting the local search server
# export WANDB_API_KEY=<your_key_here>
# bash examples/search/run_search_conversation_format.sh
# bash examples/train/search/run_search_conversation_format.sh

# path for dataset (.parquet files) containing the prompts and metadata for each question
DATA_DIR="$HOME/data/searchR1"
Expand All @@ -21,7 +21,7 @@ RUN_NAME="skyrl-search_4turns_maxgeneratelen_500"
TIS_TYPE=token
TIS_IMP_RATIO_CAP=2.0

uv run --isolated --frozen --extra vllm -m skyrl_train.entrypoints.main_base \
uv run --isolated --frozen --extra fsdp -m skyrl.train.entrypoints.main_base \
data.train_data="['${DATA_DIR}/train.parquet']" \
data.val_data="['${DATA_DIR}/validation.parquet']" \
trainer.algorithm.advantage_estimator="grpo" \
Expand Down
File renamed without changes.
Loading
Loading