NovaSky-AI · erictang000 · Feb 16, 2026 · Feb 16, 2026 · gemini-code-assist · Feb 16, 2026
diff --git a/.gitignore b/.gitignore
@@ -46,4 +46,95 @@ tensorboard_log/
 *.db
 
 # uv lock files
-uv.lock
+uv.lock
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+uv.lock
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# MkDocs build output
+site/
+
+# IDEs and editors
+.idea/
+.vscode/
+
+# OS generated files
+.DS_Store
+Thumbs.db
+
+# Hydra outputs
+outputs/
+
+# Local artifacts
+tinker.db
+uv.lock
+
+# Alembic - don't track pycache
+tx/tinker/alembic/__pycache__/
+
+# SQLite databases (tracked in git by default, but ignore if created locally)
+*.db
+*.db-journal
+*.db-wal
+*.db-shm
diff --git a/skyrl-tx/ci/anyscale_gpu_ci.yaml → ci/anyscale_gpu_ci.yaml b/skyrl-tx/ci/anyscale_gpu_ci.yaml → ci/anyscale_gpu_ci.yaml
diff --git a/ci/anyscale_gpu_ci_skyrl_train.yaml b/ci/anyscale_gpu_ci_skyrl_train.yaml
@@ -0,0 +1,9 @@
+name: skyrl-train-gpu-ci
+entrypoint: bash ci/gpu_ci_run_skyrl_train.sh
+image_uri: novaskyai/skyrl-train-ray-2.51.1-py3.12-cu12.8
+cloud: sky-anyscale-aws-us-east-1
+ray_version: "2.51.1"
+compute_config: l4_ci
+working_dir: .
+max_retries: 0
+
diff --git a/ci/anyscale_gpu_ci_skyrl_train_megatron.yaml b/ci/anyscale_gpu_ci_skyrl_train_megatron.yaml
@@ -0,0 +1,9 @@
+name: skyrl-train-gpu-ci-megatron
+entrypoint: bash ci/gpu_ci_run_skyrl_train_megatron.sh
+image_uri: novaskyai/skyrl-train-ray-2.51.1-py3.12-cu12.8-megatron
+cloud: sky-anyscale-aws-us-east-1
+ray_version: "2.51.1"
+compute_config: l4_ci
+working_dir: .
+max_retries: 0
+
diff --git a/skyrl-tx/ci/gpu_ci_run.sh → ci/gpu_ci_run.sh b/skyrl-tx/ci/gpu_ci_run.sh → ci/gpu_ci_run.sh
@@ -4,4 +4,4 @@ set -xeuo pipefail
 export CI=true
 
 # Run GPU-specific tests
-uv run --extra gpu --extra tinker --extra dev pytest tests/gpu
+uv run --extra gpu --extra tinker --extra dev pytest tests/tx/gpu
diff --git a/ci/gpu_ci_run_skyrl_train.sh b/ci/gpu_ci_run_skyrl_train.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+set -xeuo pipefail
+
+export CI=true
+# Prepare datasets used in tests.
+uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
+uv run examples/train/search/searchr1_dataset.py --local_dir $HOME/data/searchR1 --split test
+
+# Run all non-megatron and non-sglang tests
+uv run --directory . --isolated --extra dev --extra fsdp pytest -s tests/backends/skyrl_train/gpu/gpu_ci -m "not (sglang or integrations or megatron)"
+
+## TODO: enable integrations and potentially migrate sglang
+# # Run tests for "integrations" folder
+# if add_integrations=$(uv add --active wordle --index https://hub.primeintellect.ai/will/simple/ 2>&1); then
+#     echo "Running integration tests"
+#     uv run --isolated --with verifiers@git+https://github.com/PrimeIntellect-ai/verifiers.git@15f68 -- python integrations/verifiers/prepare_dataset.py --env_id will/wordle
+#     uv run --directory . --isolated --extra dev --extra vllm --with verifiers@git+https://github.com/PrimeIntellect-ai/verifiers.git@15f68 pytest -s tests/gpu/gpu_ci/ -m "integrations"
+# else 
+#     echo "Skipping integrations tests. Failed to execute uv add command"
+#     echo "$add_integrations"
+# fi
+
+# # Run all SGLang tests
+# uv run --directory . --isolated --extra dev --extra sglang pytest -s tests/gpu/gpu_ci -m "sglang"
+
+
+# Run tests for vllm 0.9.2 
+# TODO (sumanthrh): We should have a better way to override without pinning a flash-attn wheel
+uv run --isolated --extra fsdp --extra dev \
+    --with vllm==0.9.2 \
+    --with transformers==4.53.0 \
+    --with torch==2.7.0 \
+    --with "flash-attn@https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp312-cp312-linux_x86_64.whl" \
+    -- pytest -s -vvv tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py::test_token_based_generation -m "vllm"
+
+# Run tests for new inference layer
+_SKYRL_USE_NEW_INFERENCE=1 uv run --isolated --extra dev --extra fsdp pytest -s tests/backends/skyrl_train/gpu/gpu_ci/test_policy_local_engines_e2e.py -m "vllm"
+_SKYRL_USE_NEW_INFERENCE=1 uv run --isolated --extra dev --extra fsdp pytest -s tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py -m "vllm"
diff --git a/ci/gpu_ci_run_skyrl_train_megatron.sh b/ci/gpu_ci_run_skyrl_train_megatron.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -xeuo pipefail
+
+export CI=true
+# Prepare datasets used in tests.
+uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
+# Run all megatron tests
+uv run --directory . --isolated --extra dev --extra megatron pytest -s tests/backends/skyrl_train/gpu/gpu_ci -m "megatron"
+
diff --git a/skyrl-train/examples/README.md → examples/train/README.md b/skyrl-train/examples/README.md → examples/train/README.md
diff --git a/...mples/algorithms/cispo/run_cispo_gsm8k.sh → ...train/algorithms/cispo/run_cispo_gsm8k.sh b/...mples/algorithms/cispo/run_cispo_gsm8k.sh → ...train/algorithms/cispo/run_cispo_gsm8k.sh
diff --git a/...ples/algorithms/clip_cov_kl_cov/README.md → ...rain/algorithms/clip_cov_kl_cov/README.md b/...ples/algorithms/clip_cov_kl_cov/README.md → ...rain/algorithms/clip_cov_kl_cov/README.md
diff --git a/...lgorithms/clip_cov_kl_cov/run_clip_cov.sh → ...lgorithms/clip_cov_kl_cov/run_clip_cov.sh b/...lgorithms/clip_cov_kl_cov/run_clip_cov.sh → ...lgorithms/clip_cov_kl_cov/run_clip_cov.sh
diff --git a/.../algorithms/clip_cov_kl_cov/run_kl_cov.sh → .../algorithms/clip_cov_kl_cov/run_kl_cov.sh b/.../algorithms/clip_cov_kl_cov/run_kl_cov.sh → .../algorithms/clip_cov_kl_cov/run_kl_cov.sh
diff --git a/...dvantage_estimator/main_custom_adv_est.py → ...dvantage_estimator/main_custom_adv_est.py b/...dvantage_estimator/main_custom_adv_est.py → ...dvantage_estimator/main_custom_adv_est.py
diff --git a/...advantage_estimator/run_custom_adv_est.sh → ...advantage_estimator/run_custom_adv_est.sh b/...advantage_estimator/run_custom_adv_est.sh → ...advantage_estimator/run_custom_adv_est.sh
diff --git a/...om_policy_loss/main_custom_policy_loss.py → ...om_policy_loss/main_custom_policy_loss.py b/...om_policy_loss/main_custom_policy_loss.py → ...om_policy_loss/main_custom_policy_loss.py
diff --git a/...tom_policy_loss/run_custom_policy_loss.sh → ...tom_policy_loss/run_custom_policy_loss.sh b/...tom_policy_loss/run_custom_policy_loss.sh → ...tom_policy_loss/run_custom_policy_loss.sh
diff --git a/...-train/examples/algorithms/dapo/README.md → examples/train/algorithms/dapo/README.md b/...-train/examples/algorithms/dapo/README.md → examples/train/algorithms/dapo/README.md
diff --git a/...orithms/dapo/data_preprocess_dapo_aime.py → ...orithms/dapo/data_preprocess_dapo_aime.py b/...orithms/dapo/data_preprocess_dapo_aime.py → ...orithms/dapo/data_preprocess_dapo_aime.py
diff --git a/...ain/examples/algorithms/dapo/main_dapo.py → examples/train/algorithms/dapo/main_dapo.py b/...ain/examples/algorithms/dapo/main_dapo.py → examples/train/algorithms/dapo/main_dapo.py
diff --git a/...ples/algorithms/dapo/prepare_dapo_data.sh → ...rain/algorithms/dapo/prepare_dapo_data.sh b/...ples/algorithms/dapo/prepare_dapo_data.sh → ...rain/algorithms/dapo/prepare_dapo_data.sh
diff --git a/...ithms/dapo/run_dapo_aime_qwen3_4b_aime.sh → ...ithms/dapo/run_dapo_aime_qwen3_4b_aime.sh b/...ithms/dapo/run_dapo_aime_qwen3_4b_aime.sh → ...ithms/dapo/run_dapo_aime_qwen3_4b_aime.sh
diff --git a/...xamples/algorithms/dapo/run_dapo_gsm8k.sh → ...s/train/algorithms/dapo/run_dapo_gsm8k.sh b/...xamples/algorithms/dapo/run_dapo_gsm8k.sh → ...s/train/algorithms/dapo/run_dapo_gsm8k.sh
diff --git a/...orithms/dapo/run_dapo_qwen2.5_32b_aime.sh → ...orithms/dapo/run_dapo_qwen2.5_32b_aime.sh b/...orithms/dapo/run_dapo_qwen2.5_32b_aime.sh → ...orithms/dapo/run_dapo_qwen2.5_32b_aime.sh
diff --git a/...hms/dapo/run_dapo_qwen2.5_math_7b_aime.sh → ...hms/dapo/run_dapo_qwen2.5_math_7b_aime.sh b/...hms/dapo/run_dapo_qwen2.5_math_7b_aime.sh → ...hms/dapo/run_dapo_qwen2.5_math_7b_aime.sh
diff --git a/...gorithms/dapo/run_dapo_qwen3_1.7b_aime.sh → ...gorithms/dapo/run_dapo_qwen3_1.7b_aime.sh b/...gorithms/dapo/run_dapo_qwen3_1.7b_aime.sh → ...gorithms/dapo/run_dapo_qwen3_1.7b_aime.sh
diff --git a/..._dapo_qwen3_30b_a3b_lora_megatron_aime.sh → ..._dapo_qwen3_30b_a3b_lora_megatron_aime.sh b/..._dapo_qwen3_30b_a3b_lora_megatron_aime.sh → ..._dapo_qwen3_30b_a3b_lora_megatron_aime.sh
diff --git a/...o/run_dapo_qwen3_30b_a3b_megatron_aime.sh → ...o/run_dapo_qwen3_30b_a3b_megatron_aime.sh b/...o/run_dapo_qwen3_30b_a3b_megatron_aime.sh → ...o/run_dapo_qwen3_30b_a3b_megatron_aime.sh
diff --git a/...les/algorithms/drgrpo/run_drgrpo_gsm8k.sh → ...ain/algorithms/drgrpo/run_drgrpo_gsm8k.sh b/...les/algorithms/drgrpo/run_drgrpo_gsm8k.sh → ...ain/algorithms/drgrpo/run_drgrpo_gsm8k.sh
diff --git a/...xamples/algorithms/gspo/run_gspo_gsm8k.sh → ...s/train/algorithms/gspo/run_gspo_gsm8k.sh b/...xamples/algorithms/gspo/run_gspo_gsm8k.sh → ...s/train/algorithms/gspo/run_gspo_gsm8k.sh
diff --git a/...algorithms/reinforce++/run_reinforce++.sh → ...algorithms/reinforce++/run_reinforce++.sh b/...algorithms/reinforce++/run_reinforce++.sh → ...algorithms/reinforce++/run_reinforce++.sh
diff --git a/...rain/examples/algorithms/rloo/run_rloo.sh → examples/train/algorithms/rloo/run_rloo.sh b/...rain/examples/algorithms/rloo/run_rloo.sh → examples/train/algorithms/rloo/run_rloo.sh
diff --git a/...-train/examples/algorithms/sapo/README.md → examples/train/algorithms/sapo/README.md b/...-train/examples/algorithms/sapo/README.md → examples/train/algorithms/sapo/README.md
diff --git a/...xamples/algorithms/sapo/run_sapo_gsm8k.sh → ...s/train/algorithms/sapo/run_sapo_gsm8k.sh b/...xamples/algorithms/sapo/run_sapo_gsm8k.sh → ...s/train/algorithms/sapo/run_sapo_gsm8k.sh
diff --git a/...algorithms/sapo/run_sapo_qwen3_4b_aime.sh → ...algorithms/sapo/run_sapo_qwen3_4b_aime.sh b/...algorithms/sapo/run_sapo_qwen3_4b_aime.sh → ...algorithms/sapo/run_sapo_qwen3_4b_aime.sh
diff --git a/skyrl-train/examples/async/README.md → examples/train/async/README.md b/skyrl-train/examples/async/README.md → examples/train/async/README.md
diff --git a/skyrl-train/examples/async/__init__.py → examples/train/async/__init__.py b/skyrl-train/examples/async/__init__.py → examples/train/async/__init__.py
diff --git a/...l-train/examples/async/async_run_gsm8k.sh → examples/train/async/async_run_gsm8k.sh b/...l-train/examples/async/async_run_gsm8k.sh → examples/train/async/async_run_gsm8k.sh
diff --git a/skyrl-train/examples/async/async_trainer.py → examples/train/async/async_trainer.py b/skyrl-train/examples/async/async_trainer.py → examples/train/async/async_trainer.py
diff --git a/skyrl-train/examples/async/main_async.py → examples/train/async/main_async.py b/skyrl-train/examples/async/main_async.py → examples/train/async/main_async.py
diff --git a/skyrl-train/examples/flash_rl/.env.0.5b_int8 → examples/train/flash_rl/.env.0.5b_int8 b/skyrl-train/examples/flash_rl/.env.0.5b_int8 → examples/train/flash_rl/.env.0.5b_int8
diff --git a/skyrl-train/examples/flash_rl/.env.fp8 → examples/train/flash_rl/.env.fp8 b/skyrl-train/examples/flash_rl/.env.fp8 → examples/train/flash_rl/.env.fp8
diff --git a/skyrl-train/examples/flash_rl/.env.int8 → examples/train/flash_rl/.env.int8 b/skyrl-train/examples/flash_rl/.env.int8 → examples/train/flash_rl/.env.int8
diff --git a/...rain/examples/flash_rl/flash_rl_engine.py → examples/train/flash_rl/flash_rl_engine.py b/...rain/examples/flash_rl/flash_rl_engine.py → examples/train/flash_rl/flash_rl_engine.py
diff --git a/...in/examples/flash_rl/main_dapo_flashrl.py → examples/train/flash_rl/main_dapo_flashrl.py b/...in/examples/flash_rl/main_dapo_flashrl.py → examples/train/flash_rl/main_dapo_flashrl.py
diff --git a/...ash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh → ...ash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh b/...ash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh → ...ash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh
diff --git a/...sh_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh → ...sh_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh b/...sh_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh → ...sh_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh
diff --git a/...ash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh → ...ash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh b/...ash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh → ...ash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh
diff --git a/...sh_rl/run_dapo_repro_flashrl_0.5b_int8.sh → ...sh_rl/run_dapo_repro_flashrl_0.5b_int8.sh b/...sh_rl/run_dapo_repro_flashrl_0.5b_int8.sh → ...sh_rl/run_dapo_repro_flashrl_0.5b_int8.sh
diff --git a/...ash_rl/run_dapo_repro_flashrl_32b_int8.sh → ...ash_rl/run_dapo_repro_flashrl_32b_int8.sh b/...ash_rl/run_dapo_repro_flashrl_32b_int8.sh → ...ash_rl/run_dapo_repro_flashrl_32b_int8.sh
diff --git a/skyrl-train/examples/fully_async/README.md → examples/train/fully_async/README.md b/skyrl-train/examples/fully_async/README.md → examples/train/fully_async/README.md
@@ -6,11 +6,11 @@ Fully asynchronous (PipelineRL / AReal style) GRPO for Qwen2.5-1.5B-Instruct on
 
 ```bash 
 # prepare the dataset
-uv run -- python examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
+uv run -- python examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
 
 export WANDB_API_KEY=<your_key_here>
 
-bash examples/fully_async/fully_async_run_gsm8k.sh
+bash examples/train/fully_async/fully_async_run_gsm8k.sh
 ```
 
 For more details, refer to the documentation: https://docs.skyrl.ai/docs/tutorials/fully_async

diff --git a/skyrl-train/examples/fully_async/__init__.py → examples/train/fully_async/__init__.py b/skyrl-train/examples/fully_async/__init__.py → examples/train/fully_async/__init__.py
diff --git a/...ples/fully_async/fully_async_run_gsm8k.sh → ...rain/fully_async/fully_async_run_gsm8k.sh b/...ples/fully_async/fully_async_run_gsm8k.sh → ...rain/fully_async/fully_async_run_gsm8k.sh
@@ -2,18 +2,18 @@ set -x
 
 # Fully async GRPO training+generation for Qwen2.5-1.5B-Instruct on GSM8K.
 # This bash script is copied from examples/async/async_run_gsm8k.sh, except for:
-# - running examples.fully_async.main_fully_async
+# - running examples.train.fully_async.main_fully_async
 # - setting the generator.batched=false.
 # - colocate_all=false
 # - the various generator configs at the end (http, chat template, etc.)
 
-# uv run examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
+# uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
 # export WANDB_API_KEY=<your_key_here>
-# bash examples/fully_async/fully_async_run_gsm8k.sh
+# bash examples/train/fully_async/fully_async_run_gsm8k.sh
 
 # NOTE (sumanthrh): `micro_train_batch_size_per_gpu` and `micro_forward_batch_size_per_gpu` can be tuned
 
-# You can override the default values with e.g.: `NUM_GPUS=1 bash examples/fully_async/fully_async_run_gsm8k.sh`.
+# You can override the default values with e.g.: `NUM_GPUS=1 bash examples/train/fully_async/fully_async_run_gsm8k.sh`.
 
 : "${DATA_DIR:="$HOME/data/gsm8k"}"
 : "${NUM_INFERENCE_GPUS:=2}"
@@ -31,9 +31,9 @@ set -x
 TIS_TYPE=token
 TIS_IMP_RATIO_CAP=2.0
 
-RUN_NAME=gsm8k-fully-async-qwen2.5_1.5B-useTIS_${USE_TIS}-maxStale${MAX_STALENESS_STEPS}-numCon${NUM_PARALLEL_GENERATION_WORKERS}-${NUM_POLICY_GPUS}train${NUM_INFERENCE_GPUS}gen
+RUN_NAME=gsm8k-fully-async-qwen2.5_1.5B-useTIS_${TIS_TYPE}-maxStale${MAX_STALENESS_STEPS}-numCon${NUM_PARALLEL_GENERATION_WORKERS}-${NUM_POLICY_GPUS}train${NUM_INFERENCE_GPUS}gen
 
-uv run --isolated --extra $INFERENCE_BACKEND -m examples.fully_async.main_fully_async \
+uv run --isolated --extra fsdp -m examples.train.fully_async.main_fully_async \
   data.train_data="['$DATA_DIR/train.parquet']" \
   data.val_data="['$DATA_DIR/validation.parquet']" \
   trainer.fully_async.max_staleness_steps=${MAX_STALENESS_STEPS} \
@@ -76,6 +76,5 @@ uv run --isolated --extra $INFERENCE_BACKEND -m examples.fully_async.main_fully_
   trainer.run_name=${RUN_NAME} \
   trainer.resume_mode=latest \
   trainer.ckpt_path="$HOME/ckpts/${RUN_NAME}" \
-  trainer.resume_mode=latest \
   generator.enforce_eager=true \
   $@
diff --git a/.../examples/fully_async/main_fully_async.py → ...les/train/fully_async/main_fully_async.py b/.../examples/fully_async/main_fully_async.py → ...les/train/fully_async/main_fully_async.py
@@ -4,10 +4,10 @@
 
 import hydra
 from omegaconf import DictConfig
-from skyrl_train.entrypoints.main_base import BasePPOExp, config_dir, validate_cfg
-from skyrl_train.fully_async_trainer import FullyAsyncRayPPOTrainer
+from skyrl.train.entrypoints.main_base import BasePPOExp, config_dir, validate_cfg
+from skyrl.train.fully_async_trainer import FullyAsyncRayPPOTrainer
 import asyncio
-from skyrl_train.utils import initialize_ray
+from skyrl.train.utils import initialize_ray
 import ray
 
 

diff --git a/...-train/examples/gptoss/bench_flex_attn.py → examples/train/gptoss/bench_flex_attn.py b/...-train/examples/gptoss/bench_flex_attn.py → examples/train/gptoss/bench_flex_attn.py
diff --git a/...train/examples/gptoss/run_gsm8k_gptoss.sh → examples/train/gptoss/run_gsm8k_gptoss.sh b/...train/examples/gptoss/run_gsm8k_gptoss.sh → examples/train/gptoss/run_gsm8k_gptoss.sh
diff --git a/skyrl-train/examples/gptoss/test_gptoss.py → examples/train/gptoss/test_gptoss.py b/skyrl-train/examples/gptoss/test_gptoss.py → examples/train/gptoss/test_gptoss.py
diff --git a/...n/examples/gsm8k/gsm8k-grpo-skypilot.yaml → ...ples/train/gsm8k/gsm8k-grpo-skypilot.yaml b/...n/examples/gsm8k/gsm8k-grpo-skypilot.yaml → ...ples/train/gsm8k/gsm8k-grpo-skypilot.yaml
diff --git a/skyrl-train/examples/gsm8k/gsm8k_dataset.py → examples/train/gsm8k/gsm8k_dataset.py b/skyrl-train/examples/gsm8k/gsm8k_dataset.py → examples/train/gsm8k/gsm8k_dataset.py
diff --git a/skyrl-train/examples/gsm8k/run_32b_gsm8k.sh → examples/train/gsm8k/run_32b_gsm8k.sh b/skyrl-train/examples/gsm8k/run_32b_gsm8k.sh → examples/train/gsm8k/run_32b_gsm8k.sh
diff --git a/...in/examples/gsm8k/run_generation_gsm8k.sh → examples/train/gsm8k/run_generation_gsm8k.sh b/...in/examples/gsm8k/run_generation_gsm8k.sh → examples/train/gsm8k/run_generation_gsm8k.sh
diff --git a/skyrl-train/examples/gsm8k/run_gsm8k.sh → examples/train/gsm8k/run_gsm8k.sh b/skyrl-train/examples/gsm8k/run_gsm8k.sh → examples/train/gsm8k/run_gsm8k.sh
@@ -2,13 +2,13 @@ set -x
 
 # Colocated GRPO training+generation for Qwen2.5-1.5B-Instruct on GSM8K.
 
-# uv run examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
+# uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
 # export WANDB_API_KEY=<your_key_here>
-# bash examples/gsm8k/run_gsm8k.sh
+# bash examples/train/gsm8k/run_gsm8k.sh
 
 # NOTE (sumanthrh): `micro_train_batch_size_per_gpu` and `micro_forward_batch_size_per_gpu` can be tuned
 
-# You can override the default values with e.g.: `NUM_GPUS=1 bash examples/gsm8k/run_gsm8k.sh`.
+# You can override the default values with e.g.: `NUM_GPUS=1 bash examples/train/gsm8k/run_gsm8k.sh`.
 
 : "${DATA_DIR:="$HOME/data/gsm8k"}"
 : "${NUM_GPUS:=4}"
@@ -17,7 +17,7 @@ set -x
 : "${INFERENCE_BACKEND:=vllm}"
 # : "${INFERENCE_BACKEND:=sglang}"
 
-uv run --isolated --extra $INFERENCE_BACKEND -m skyrl_train.entrypoints.main_base \
+uv run --isolated --extra fsdp -m skyrl.train.entrypoints.main_base \
   data.train_data="['$DATA_DIR/train.parquet']" \
   data.val_data="['$DATA_DIR/validation.parquet']" \
   trainer.algorithm.advantage_estimator="grpo" \

diff --git a/...l-train/examples/gsm8k/run_gsm8k_modal.sh → examples/train/gsm8k/run_gsm8k_modal.sh b/...l-train/examples/gsm8k/run_gsm8k_modal.sh → examples/train/gsm8k/run_gsm8k_modal.sh
diff --git a/skyrl-train/examples/harbor/README.md → examples/train/harbor/README.md b/skyrl-train/examples/harbor/README.md → examples/train/harbor/README.md
diff --git a/skyrl-train/examples/harbor/dataset.py → examples/train/harbor/dataset.py b/skyrl-train/examples/harbor/dataset.py → examples/train/harbor/dataset.py
diff --git a/...xamples/harbor/entrypoints/main_harbor.py → ...s/train/harbor/entrypoints/main_harbor.py b/...xamples/harbor/entrypoints/main_harbor.py → ...s/train/harbor/entrypoints/main_harbor.py
diff --git a/...arbor/entrypoints/main_harbor_generate.py → ...arbor/entrypoints/main_harbor_generate.py b/...arbor/entrypoints/main_harbor_generate.py → ...arbor/entrypoints/main_harbor_generate.py
diff --git a/...train/examples/harbor/harbor_generator.py → examples/train/harbor/harbor_generator.py b/...train/examples/harbor/harbor_generator.py → examples/train/harbor/harbor_generator.py
diff --git a/...s/harbor/harbor_trial_config/default.yaml → ...n/harbor/harbor_trial_config/default.yaml b/...s/harbor/harbor_trial_config/default.yaml → ...n/harbor/harbor_trial_config/default.yaml
diff --git a/...examples/harbor/prepare_harbor_dataset.py → ...es/train/harbor/prepare_harbor_dataset.py b/...examples/harbor/prepare_harbor_dataset.py → ...es/train/harbor/prepare_harbor_dataset.py
diff --git a/...-train/examples/harbor/run_codecontest.sh → examples/train/harbor/run_codecontest.sh b/...-train/examples/harbor/run_codecontest.sh → examples/train/harbor/run_codecontest.sh
diff --git a/...l-train/examples/harbor/run_harbor_gen.sh → examples/train/harbor/run_harbor_gen.sh b/...l-train/examples/harbor/run_harbor_gen.sh → examples/train/harbor/run_harbor_gen.sh
diff --git a/skyrl-train/examples/harbor/run_otagent.sh → examples/train/harbor/run_otagent.sh b/skyrl-train/examples/harbor/run_otagent.sh → examples/train/harbor/run_otagent.sh
diff --git a/skyrl-train/examples/livecodebench/lcb.md → examples/train/livecodebench/lcb.md b/skyrl-train/examples/livecodebench/lcb.md → examples/train/livecodebench/lcb.md
diff --git a/...ain/examples/livecodebench/lcb_dataset.py → examples/train/livecodebench/lcb_dataset.py b/...ain/examples/livecodebench/lcb_dataset.py → examples/train/livecodebench/lcb_dataset.py
diff --git a/...in/examples/livecodebench/lcb_download.py → examples/train/livecodebench/lcb_download.py b/...in/examples/livecodebench/lcb_download.py → examples/train/livecodebench/lcb_download.py
diff --git a/...l-train/examples/livecodebench/run_lcb.sh → examples/train/livecodebench/run_lcb.sh b/...l-train/examples/livecodebench/run_lcb.sh → examples/train/livecodebench/run_lcb.sh
diff --git a/...les/llm_as_a_judge/gsm8k_dataset_judge.py → ...ain/llm_as_a_judge/gsm8k_dataset_judge.py b/...les/llm_as_a_judge/gsm8k_dataset_judge.py → ...ain/llm_as_a_judge/gsm8k_dataset_judge.py
diff --git a/.../examples/llm_as_a_judge/llm_judge_env.py → ...les/train/llm_as_a_judge/llm_judge_env.py b/.../examples/llm_as_a_judge/llm_judge_env.py → ...les/train/llm_as_a_judge/llm_judge_env.py
diff --git a/...examples/llm_as_a_judge/main_llm_judge.py → ...es/train/llm_as_a_judge/main_llm_judge.py b/...examples/llm_as_a_judge/main_llm_judge.py → ...es/train/llm_as_a_judge/main_llm_judge.py
diff --git a/.../examples/llm_as_a_judge/run_llm_judge.sh → ...les/train/llm_as_a_judge/run_llm_judge.sh b/.../examples/llm_as_a_judge/run_llm_judge.sh → ...les/train/llm_as_a_judge/run_llm_judge.sh
diff --git a/.../lora/run_qwen2_5_0.5b_gsm8k_grpo_lora.sh → .../lora/run_qwen2_5_0.5b_gsm8k_grpo_lora.sh b/.../lora/run_qwen2_5_0.5b_gsm8k_grpo_lora.sh → .../lora/run_qwen2_5_0.5b_gsm8k_grpo_lora.sh
diff --git a/...s/lora/run_qwen2_5_0.5b_gsm8k_ppo_lora.sh → ...n/lora/run_qwen2_5_0.5b_gsm8k_ppo_lora.sh b/...s/lora/run_qwen2_5_0.5b_gsm8k_ppo_lora.sh → ...n/lora/run_qwen2_5_0.5b_gsm8k_ppo_lora.sh
diff --git a/...in/examples/megatron/run_fsdp_baseline.sh → examples/train/megatron/run_fsdp_baseline.sh b/...in/examples/megatron/run_fsdp_baseline.sh → examples/train/megatron/run_fsdp_baseline.sh
diff --git a/...l-train/examples/megatron/run_megatron.sh → examples/train/megatron/run_megatron.sh b/...l-train/examples/megatron/run_megatron.sh → examples/train/megatron/run_megatron.sh
@@ -2,9 +2,9 @@ set -x
 
 # Colocated GRPO training+generation for Qwen3-0.6B on GSM8K with Megatron.
 
-# uv run examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
+# uv run examples/train/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
 # export WANDB_API_KEY=<your_key_here>
-# bash examples/megatron/run_megatron.sh
+# bash examples/train/megatron/run_megatron.sh
 
 DATA_DIR="$HOME/data/gsm8k"
 NUM_GPUS=4
@@ -22,7 +22,7 @@ ENABLE_TORCH_PROFILER=false
 RANKS_TO_PROFILE="[0]"
 SAVE_PATH="$HOME/megatron_prof/tp${MEGATRON_TP}_pp${MEGATRON_PP}_cp${MEGATRON_CP}_${MODEL_NAME}"
 
-uv run --isolated --extra mcore -m skyrl_train.entrypoints.main_base \
+uv run --isolated --extra megatron -m skyrl.train.entrypoints.main_base \
   data.train_data="['$DATA_DIR/train.parquet']" \
   data.val_data="['$DATA_DIR/validation.parquet']" \
   trainer.algorithm.advantage_estimator="grpo" \

diff --git a/...run_megatron_dapo_qwen3_235b_a22b_lora.sh → ...run_megatron_dapo_qwen3_235b_a22b_lora.sh b/...run_megatron_dapo_qwen3_235b_a22b_lora.sh → ...run_megatron_dapo_qwen3_235b_a22b_lora.sh
diff --git a/...gatron/run_megatron_dapo_qwen3_30b_a3b.sh → ...gatron/run_megatron_dapo_qwen3_30b_a3b.sh b/...gatron/run_megatron_dapo_qwen3_30b_a3b.sh → ...gatron/run_megatron_dapo_qwen3_30b_a3b.sh
diff --git a/...n/run_megatron_dapo_qwen3_30b_a3b_lora.sh → ...n/run_megatron_dapo_qwen3_30b_a3b_lora.sh b/...n/run_megatron_dapo_qwen3_30b_a3b_lora.sh → ...n/run_megatron_dapo_qwen3_30b_a3b_lora.sh
diff --git a/...es/megatron/run_megatron_dapo_qwen3_4b.sh → ...in/megatron/run_megatron_dapo_qwen3_4b.sh b/...es/megatron/run_megatron_dapo_qwen3_4b.sh → ...in/megatron/run_megatron_dapo_qwen3_4b.sh
diff --git a/...gatron/run_megatron_dapo_qwen3_4b_lora.sh → ...gatron/run_megatron_dapo_qwen3_4b_lora.sh b/...gatron/run_megatron_dapo_qwen3_4b_lora.sh → ...gatron/run_megatron_dapo_qwen3_4b_lora.sh
diff --git a/.../megatron/run_megatron_lora_qwen3-0.6b.sh → .../megatron/run_megatron_lora_qwen3-0.6b.sh b/.../megatron/run_megatron_lora_qwen3-0.6b.sh → .../megatron/run_megatron_lora_qwen3-0.6b.sh
diff --git a/...gatron/run_megatron_lora_qwen3-30b-a3b.sh → ...gatron/run_megatron_lora_qwen3-30b-a3b.sh b/...gatron/run_megatron_lora_qwen3-30b-a3b.sh → ...gatron/run_megatron_lora_qwen3-30b-a3b.sh
diff --git a/...amples/megatron/run_megatron_moonlight.sh → .../train/megatron/run_megatron_moonlight.sh b/...amples/megatron/run_megatron_moonlight.sh → .../train/megatron/run_megatron_moonlight.sh
diff --git a/.../megatron/run_megatron_qwen3-235b-a22b.sh → .../megatron/run_megatron_qwen3-235b-a22b.sh b/.../megatron/run_megatron_qwen3-235b-a22b.sh → .../megatron/run_megatron_qwen3-235b-a22b.sh
diff --git a/...es/megatron/run_megatron_qwen3-30b-a3b.sh → ...in/megatron/run_megatron_qwen3-30b-a3b.sh b/...es/megatron/run_megatron_qwen3-30b-a3b.sh → ...in/megatron/run_megatron_qwen3-30b-a3b.sh
diff --git a/.../examples/megatron/run_search_megatron.sh → ...les/train/megatron/run_search_megatron.sh b/.../examples/megatron/run_search_megatron.sh → ...les/train/megatron/run_search_megatron.sh
diff --git a/...rain/examples/mini_swe_agent/.env.miniswe → examples/train/mini_swe_agent/.env.miniswe b/...rain/examples/mini_swe_agent/.env.miniswe → examples/train/mini_swe_agent/.env.miniswe
diff --git a/...l-train/examples/mini_swe_agent/README.md → examples/train/mini_swe_agent/README.md b/...l-train/examples/mini_swe_agent/README.md → examples/train/mini_swe_agent/README.md
diff --git a/...rain/examples/mini_swe_agent/litellm.json → examples/train/mini_swe_agent/litellm.json b/...rain/examples/mini_swe_agent/litellm.json → examples/train/mini_swe_agent/litellm.json
diff --git a/.../examples/mini_swe_agent/main_mini_swe.py → ...les/train/mini_swe_agent/main_mini_swe.py b/.../examples/mini_swe_agent/main_mini_swe.py → ...les/train/mini_swe_agent/main_mini_swe.py
diff --git a/...ples/mini_swe_agent/mini_swe_generator.py → ...rain/mini_swe_agent/mini_swe_generator.py b/...ples/mini_swe_agent/mini_swe_generator.py → ...rain/mini_swe_agent/mini_swe_generator.py
diff --git a/...examples/mini_swe_agent/mini_swe_utils.py → ...es/train/mini_swe_agent/mini_swe_utils.py b/...examples/mini_swe_agent/mini_swe_utils.py → ...es/train/mini_swe_agent/mini_swe_utils.py
diff --git a/...mples/mini_swe_agent/preprocess_swegym.py → ...train/mini_swe_agent/preprocess_swegym.py b/...mples/mini_swe_agent/preprocess_swegym.py → ...train/mini_swe_agent/preprocess_swegym.py
diff --git a/...amples/mini_swe_agent/run_mini_swe_30B.sh → .../train/mini_swe_agent/run_mini_swe_30B.sh b/...amples/mini_swe_agent/run_mini_swe_30B.sh → .../train/mini_swe_agent/run_mini_swe_30B.sh
diff --git a/...xamples/mini_swe_agent/run_mini_swe_8B.sh → ...s/train/mini_swe_agent/run_mini_swe_8B.sh b/...xamples/mini_swe_agent/run_mini_swe_8B.sh → ...s/train/mini_swe_agent/run_mini_swe_8B.sh
diff --git a/...ain/examples/mini_swe_agent/swebench.yaml → examples/train/mini_swe_agent/swebench.yaml b/...ain/examples/mini_swe_agent/swebench.yaml → examples/train/mini_swe_agent/swebench.yaml
diff --git a/skyrl-train/examples/moe/README.md → examples/train/moe/README.md b/skyrl-train/examples/moe/README.md → examples/train/moe/README.md
diff --git a/...ain/examples/moe/run_qwen1_5_MoE_A2_7B.sh → examples/train/moe/run_qwen1_5_MoE_A2_7B.sh b/...ain/examples/moe/run_qwen1_5_MoE_A2_7B.sh → examples/train/moe/run_qwen1_5_MoE_A2_7B.sh
diff --git a/skyrl-train/examples/multiply/env.py → examples/train/multiply/env.py b/skyrl-train/examples/multiply/env.py → examples/train/multiply/env.py
diff --git a/...-train/examples/multiply/main_multiply.py → examples/train/multiply/main_multiply.py b/...-train/examples/multiply/main_multiply.py → examples/train/multiply/main_multiply.py
diff --git a/...ain/examples/multiply/multiply_dataset.py → examples/train/multiply/multiply_dataset.py b/...ain/examples/multiply/multiply_dataset.py → examples/train/multiply/multiply_dataset.py
diff --git a/...l-train/examples/multiply/run_multiply.sh → examples/train/multiply/run_multiply.sh b/...l-train/examples/multiply/run_multiply.sh → examples/train/multiply/run_multiply.sh
diff --git a/...examples/on_policy_distillation/README.md → ...es/train/on_policy_distillation/README.md b/...examples/on_policy_distillation/README.md → ...es/train/on_policy_distillation/README.md
diff --git a/...cy_distillation/main_on_policy_distill.py → ...cy_distillation/main_on_policy_distill.py b/...cy_distillation/main_on_policy_distill.py → ...cy_distillation/main_on_policy_distill.py
diff --git a/.../run_on_policy_distill_math_qwen3_1.7b.sh → .../run_on_policy_distill_math_qwen3_1.7b.sh b/.../run_on_policy_distill_math_qwen3_1.7b.sh → .../run_on_policy_distill_math_qwen3_1.7b.sh
diff --git a/...on/run_on_policy_distill_math_qwen3_4b.sh → ...on/run_on_policy_distill_math_qwen3_4b.sh b/...on/run_on_policy_distill_math_qwen3_4b.sh → ...on/run_on_policy_distill_math_qwen3_4b.sh
diff --git a/skyrl-train/examples/ppo/run_ppo.sh → examples/train/ppo/run_ppo.sh b/skyrl-train/examples/ppo/run_ppo.sh → examples/train/ppo/run_ppo.sh
diff --git a/...les/remote_inference_engine/run_remote.sh → ...ain/remote_inference_engine/run_remote.sh b/...les/remote_inference_engine/run_remote.sh → ...ain/remote_inference_engine/run_remote.sh
diff --git a/...ote_inference_engine/run_sglang_server.sh → ...ote_inference_engine/run_sglang_server.sh b/...ote_inference_engine/run_sglang_server.sh → ...ote_inference_engine/run_sglang_server.sh
diff --git a/...emote_inference_engine/run_vllm_server.sh → ...emote_inference_engine/run_vllm_server.sh b/...emote_inference_engine/run_vllm_server.sh → ...emote_inference_engine/run_vllm_server.sh
diff --git a/skyrl-train/examples/search/README.md → examples/train/search/README.md b/skyrl-train/examples/search/README.md → examples/train/search/README.md
@@ -7,7 +7,7 @@ Additional Reference: [Verl+Sglang Instructions](https://github.com/zhaochenyang
 ## Prepare Datasets 
 ```bash
 local_dir=~/data/searchR1
-uv run --isolated examples/search/searchr1_dataset.py --local_dir $local_dir
+uv run --isolated examples/train/search/searchr1_dataset.py --local_dir $local_dir
 ```
 
 # Start the Search Engine
@@ -38,7 +38,7 @@ pip install uvicorn fastapi
 conda activate retriever
 
 local_dir=~/data/searchR1
-python examples/search/searchr1_download.py --local_dir $local_dir
+python examples/train/search/searchr1_download.py --local_dir $local_dir
 cat $local_dir/part_* > $local_dir/e5_Flat.index
 gzip -d $local_dir/wiki-18.jsonl.gz
 ```
@@ -49,13 +49,13 @@ conda activate retriever
 
 # redirect the output to a file to avoid cluttering the terminal
 # we have observed outputting to the terminal causing spikes in server response times
-bash examples/search/retriever/retrieval_launch.sh > retrieval_server.log 
+bash examples/train/search/retriever/retrieval_launch.sh > retrieval_server.log 
 ```
 
 ## Launch your Training Job
 Now from your base environment, you can launch your training run (which will use uv to package dependencies, separately from the retriever environment).
 
 ```bash
     export WANDB_API_KEY=your_wandb_api_key
-    bash examples/search/run_search.sh
+    bash examples/train/search/run_search.sh
 ```
diff --git a/...ples/search/retriever/retrieval_launch.sh → ...rain/search/retriever/retrieval_launch.sh b/...ples/search/retriever/retrieval_launch.sh → ...rain/search/retriever/retrieval_launch.sh
@@ -5,7 +5,7 @@ corpus_file=$save_path/wiki-18.jsonl
 retriever_name=e5
 retriever_path=intfloat/e5-base-v2
 
-python examples/search/retriever/retrieval_server.py \
+python examples/train/search/retriever/retrieval_server.py \
   --index_path $index_file \
   --corpus_path $corpus_file \
   --topk 3 \

diff --git a/...ples/search/retriever/retrieval_server.py → ...rain/search/retriever/retrieval_server.py b/...ples/search/retriever/retrieval_server.py → ...rain/search/retriever/retrieval_server.py
diff --git a/skyrl-train/examples/search/run_search.sh → examples/train/search/run_search.sh b/skyrl-train/examples/search/run_search.sh → examples/train/search/run_search.sh
@@ -4,7 +4,7 @@ set -x
 # follow the instructions in examples/search/README.md for setting up the dataset
 # and for starting the local search server
 # export WANDB_API_KEY=<your_key_here>
-# bash examples/search/run_search.sh
+# bash examples/train/search/run_search.sh
 
 # path for dataset (.parquet files) containing the prompts and metadata for each question
 DATA_DIR="$HOME/data/searchR1"
@@ -14,7 +14,7 @@ RUN_NAME="skyrl-search_4turns_maxgeneratelen_500-multiturn-sync-TIS_2.0"
 TIS_TYPE=token
 TIS_IMP_RATIO_CAP=2.0
 
-uv run --isolated --frozen --extra vllm -m skyrl_train.entrypoints.main_base \
+uv run --isolated --frozen --extra fsdp -m skyrl.train.entrypoints.main_base \
   data.train_data="['${DATA_DIR}/train.parquet']" \
   data.val_data="['${DATA_DIR}/validation.parquet']" \
   trainer.algorithm.advantage_estimator="grpo" \

diff --git a/.../search/run_search_conversation_format.sh → .../search/run_search_conversation_format.sh b/.../search/run_search_conversation_format.sh → .../search/run_search_conversation_format.sh
@@ -8,10 +8,10 @@ set -x
 # the search result for each turn).
 
 # Colocated GRPO training+generation for Qwen2.5-Coder-3B-Instruct on SearchR1 data.
-# follow the instructions in examples/search/README.md for setting up the dataset
+# follow the instructions in examples/train/search/README.md for setting up the dataset
 # and for starting the local search server
 # export WANDB_API_KEY=<your_key_here>
-# bash examples/search/run_search_conversation_format.sh
+# bash examples/train/search/run_search_conversation_format.sh
 
 # path for dataset (.parquet files) containing the prompts and metadata for each question
 DATA_DIR="$HOME/data/searchR1"
@@ -21,7 +21,7 @@ RUN_NAME="skyrl-search_4turns_maxgeneratelen_500"
 TIS_TYPE=token
 TIS_IMP_RATIO_CAP=2.0
 
-uv run --isolated --frozen --extra vllm -m skyrl_train.entrypoints.main_base \
+uv run --isolated --frozen --extra fsdp -m skyrl.train.entrypoints.main_base \
   data.train_data="['${DATA_DIR}/train.parquet']" \
   data.val_data="['${DATA_DIR}/validation.parquet']" \
   trainer.algorithm.advantage_estimator="grpo" \

diff --git a/...train/examples/search/searchr1_dataset.py → examples/train/search/searchr1_dataset.py b/...train/examples/search/searchr1_dataset.py → examples/train/search/searchr1_dataset.py
diff --git a/...rain/examples/search/searchr1_download.py → examples/train/search/searchr1_download.py b/...rain/examples/search/searchr1_download.py → examples/train/search/searchr1_download.py
diff --git a/skyrl-train/examples/sft/README.md → examples/train/sft/README.md b/skyrl-train/examples/sft/README.md → examples/train/sft/README.md
diff --git a/skyrl-train/examples/sft/sft_trainer.py → examples/train/sft/sft_trainer.py b/skyrl-train/examples/sft/sft_trainer.py → examples/train/sft/sft_trainer.py
diff --git a/...ples/step_wise/run_skyrl_sql_step_wise.sh → ...rain/step_wise/run_skyrl_sql_step_wise.sh b/...ples/step_wise/run_skyrl_sql_step_wise.sh → ...rain/step_wise/run_skyrl_sql_step_wise.sh
diff --git a/...tep_wise/run_skyrl_sql_step_wise_qwen3.sh → ...tep_wise/run_skyrl_sql_step_wise_qwen3.sh b/...tep_wise/run_skyrl_sql_step_wise_qwen3.sh → ...tep_wise/run_skyrl_sql_step_wise_qwen3.sh
diff --git a/...ain/examples/text_to_sql/run_skyrl_sql.sh → examples/train/text_to_sql/run_skyrl_sql.sh b/...ain/examples/text_to_sql/run_skyrl_sql.sh → examples/train/text_to_sql/run_skyrl_sql.sh
diff --git a/..._sql/run_skyrl_sql_conversation_format.sh → ..._sql/run_skyrl_sql_conversation_format.sh b/..._sql/run_skyrl_sql_conversation_format.sh → ..._sql/run_skyrl_sql_conversation_format.sh
diff --git a/...ext_to_sql/run_skyrl_sql_megatron_lora.sh → ...ext_to_sql/run_skyrl_sql_megatron_lora.sh b/...ext_to_sql/run_skyrl_sql_megatron_lora.sh → ...ext_to_sql/run_skyrl_sql_megatron_lora.sh
diff --git a/...rain/examples/text_to_sql/run_sql_fsdp.sh → examples/train/text_to_sql/run_sql_fsdp.sh b/...rain/examples/text_to_sql/run_sql_fsdp.sh → examples/train/text_to_sql/run_sql_fsdp.sh
diff --git a/...xamples/text_to_sql/run_sql_fsdp_2node.sh → ...s/train/text_to_sql/run_sql_fsdp_2node.sh b/...xamples/text_to_sql/run_sql_fsdp_2node.sh → ...s/train/text_to_sql/run_sql_fsdp_2node.sh
diff --git a/skyrl-train/examples/text_to_sql/sql.md → examples/train/text_to_sql/sql.md b/skyrl-train/examples/text_to_sql/sql.md → examples/train/text_to_sql/sql.md
diff --git a/.../examples/tis_correction/main_tis_dapo.py → ...les/train/tis_correction/main_tis_dapo.py b/.../examples/tis_correction/main_tis_dapo.py → ...les/train/tis_correction/main_tis_dapo.py
diff --git a/...n/examples/tis_correction/run_dapo_tis.sh → ...ples/train/tis_correction/run_dapo_tis.sh b/...n/examples/tis_correction/run_dapo_tis.sh → ...ples/train/tis_correction/run_dapo_tis.sh
diff --git a/...amples/training_backends/fsdp/run_fsdp.sh → .../train/training_backends/fsdp/run_fsdp.sh b/...amples/training_backends/fsdp/run_fsdp.sh → .../train/training_backends/fsdp/run_fsdp.sh
diff --git a/...mples/training_backends/fsdp/run_fsdp2.sh → ...train/training_backends/fsdp/run_fsdp2.sh b/...mples/training_backends/fsdp/run_fsdp2.sh → ...train/training_backends/fsdp/run_fsdp2.sh
diff --git a/...ples/training_backends/run_no_seq_pack.sh → ...rain/training_backends/run_no_seq_pack.sh b/...ples/training_backends/run_no_seq_pack.sh → ...rain/training_backends/run_no_seq_pack.sh
diff --git a/...ain/examples/turn_level_rewards/README.md → examples/train/turn_level_rewards/README.md b/...ain/examples/turn_level_rewards/README.md → examples/train/turn_level_rewards/README.md
diff --git a/...level_rewards/gsm8k_multi_turn_dataset.py → ...level_rewards/gsm8k_multi_turn_dataset.py b/...level_rewards/gsm8k_multi_turn_dataset.py → ...level_rewards/gsm8k_multi_turn_dataset.py
diff --git a/...urn_level_rewards/run_gsm8k_multi_turn.sh → ...urn_level_rewards/run_gsm8k_multi_turn.sh b/...urn_level_rewards/run_gsm8k_multi_turn.sh → ...urn_level_rewards/run_gsm8k_multi_turn.sh
diff --git a/skyrl-train/integrations/modal/README.md → examples/train_integrations/modal/README.md b/skyrl-train/integrations/modal/README.md → examples/train_integrations/modal/README.md
diff --git a/skyrl-train/integrations/modal/main.py → examples/train_integrations/modal/main.py b/skyrl-train/integrations/modal/main.py → examples/train_integrations/modal/main.py
diff --git a/skyrl-train/integrations/openenv/README.md → ...ples/train_integrations/openenv/README.md b/skyrl-train/integrations/openenv/README.md → ...ples/train_integrations/openenv/README.md
diff --git a/...tions/openenv/entrypoints/main_openenv.py → ...tions/openenv/entrypoints/main_openenv.py b/...tions/openenv/entrypoints/main_openenv.py → ...tions/openenv/entrypoints/main_openenv.py
diff --git a/skyrl-train/integrations/openenv/env.py → examples/train_integrations/openenv/env.py b/skyrl-train/integrations/openenv/env.py → examples/train_integrations/openenv/env.py
diff --git a/...tegrations/openenv/install_environment.py → ...tegrations/openenv/install_environment.py b/...tegrations/openenv/install_environment.py → ...tegrations/openenv/install_environment.py
diff --git a/...grations/openenv/prepare_dummy_dataset.py → ...grations/openenv/prepare_dummy_dataset.py b/...grations/openenv/prepare_dummy_dataset.py → ...grations/openenv/prepare_dummy_dataset.py
diff --git a/...train/integrations/openenv/run_openenv.sh → ...train_integrations/openenv/run_openenv.sh b/...train/integrations/openenv/run_openenv.sh → ...train_integrations/openenv/run_openenv.sh
diff --git a/skyrl-train/integrations/verifiers/README.md → ...es/train_integrations/verifiers/README.md b/skyrl-train/integrations/verifiers/README.md → ...es/train_integrations/verifiers/README.md
diff --git a/...s/verifiers/entrypoints/main_verifiers.py → ...s/verifiers/entrypoints/main_verifiers.py b/...s/verifiers/entrypoints/main_verifiers.py → ...s/verifiers/entrypoints/main_verifiers.py
diff --git a/...grations/verifiers/install_environment.py → ...grations/verifiers/install_environment.py b/...grations/verifiers/install_environment.py → ...grations/verifiers/install_environment.py
diff --git a/...integrations/verifiers/prepare_dataset.py → ...integrations/verifiers/prepare_dataset.py b/...integrations/verifiers/prepare_dataset.py → ...integrations/verifiers/prepare_dataset.py
diff --git a/...n/integrations/verifiers/run_verifiers.sh → ...n_integrations/verifiers/run_verifiers.sh b/...n/integrations/verifiers/run_verifiers.sh → ...n_integrations/verifiers/run_verifiers.sh
diff --git a/...grations/verifiers/verifiers_generator.py → ...grations/verifiers/verifiers_generator.py b/...grations/verifiers/verifiers_generator.py → ...grations/verifiers/verifiers_generator.py
diff --git a/skyrl-train/scripts/full_context/README.md → ...ples/train_scripts/full_context/README.md b/skyrl-train/scripts/full_context/README.md → ...ples/train_scripts/full_context/README.md
diff --git a/skyrl-train/scripts/full_context/__init__.py → ...es/train_scripts/full_context/__init__.py b/skyrl-train/scripts/full_context/__init__.py → ...es/train_scripts/full_context/__init__.py
diff --git a/...ain/scripts/full_context/main_full_ctx.py → ...ain_scripts/full_context/main_full_ctx.py b/...ain/scripts/full_context/main_full_ctx.py → ...ain_scripts/full_context/main_full_ctx.py
diff --git a/...rain/scripts/full_context/run_full_ctx.sh → ...rain_scripts/full_context/run_full_ctx.sh b/...rain/scripts/full_context/run_full_ctx.sh → ...rain_scripts/full_context/run_full_ctx.sh
diff --git a/...pts/full_context/run_full_ctx_megatron.sh → ...pts/full_context/run_full_ctx_megatron.sh b/...pts/full_context/run_full_ctx_megatron.sh → ...pts/full_context/run_full_ctx_megatron.sh
diff --git a/.../scripts/full_context/trainer_full_ctx.py → ..._scripts/full_context/trainer_full_ctx.py b/.../scripts/full_context/trainer_full_ctx.py → ..._scripts/full_context/trainer_full_ctx.py
diff --git a/...scripts/launch_multiple_remote_servers.py → ...scripts/launch_multiple_remote_servers.py b/...scripts/launch_multiple_remote_servers.py → ...scripts/launch_multiple_remote_servers.py
diff --git a/skyrl-train/scripts/multi_node_nccl_test.py → ...les/train_scripts/multi_node_nccl_test.py b/skyrl-train/scripts/multi_node_nccl_test.py → ...les/train_scripts/multi_node_nccl_test.py