NovaSky-AI · erictang000 · Feb 16, 2026 · Feb 15, 2026 · Feb 16, 2026 · gemini-code-assist
diff --git a/.gitignore b/.gitignore
@@ -46,4 +46,95 @@ tensorboard_log/
 *.db
 
 # uv lock files
-uv.lock
+uv.lock
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+uv.lock
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# MkDocs build output
+site/
+
+# IDEs and editors
+.idea/
+.vscode/
+
+# OS generated files
+.DS_Store
+Thumbs.db
+
+# Hydra outputs
+outputs/
+
+# Local artifacts
+tinker.db
+uv.lock
+
+# Alembic - don't track pycache
+tx/tinker/alembic/__pycache__/
+
+# SQLite databases (tracked in git by default, but ignore if created locally)
+*.db
+*.db-journal
+*.db-wal
+*.db-shm
diff --git a/skyrl-tx/ci/anyscale_gpu_ci.yaml → ci/anyscale_gpu_ci.yaml b/skyrl-tx/ci/anyscale_gpu_ci.yaml → ci/anyscale_gpu_ci.yaml
diff --git a/skyrl/ci/anyscale_gpu_ci_skyrl_train.yaml → ci/anyscale_gpu_ci_skyrl_train.yaml b/skyrl/ci/anyscale_gpu_ci_skyrl_train.yaml → ci/anyscale_gpu_ci_skyrl_train.yaml
diff --git a/...anyscale_gpu_ci_skyrl_train_megatron.yaml → ci/anyscale_gpu_ci_skyrl_train_megatron.yaml b/...anyscale_gpu_ci_skyrl_train_megatron.yaml → ci/anyscale_gpu_ci_skyrl_train_megatron.yaml
diff --git a/skyrl/ci/gpu_ci_run.sh → ci/gpu_ci_run.sh b/skyrl/ci/gpu_ci_run.sh → ci/gpu_ci_run.sh
diff --git a/skyrl/ci/gpu_ci_run_skyrl_train.sh → ci/gpu_ci_run_skyrl_train.sh b/skyrl/ci/gpu_ci_run_skyrl_train.sh → ci/gpu_ci_run_skyrl_train.sh
diff --git a/skyrl/ci/gpu_ci_run_skyrl_train_megatron.sh → ci/gpu_ci_run_skyrl_train_megatron.sh b/skyrl/ci/gpu_ci_run_skyrl_train_megatron.sh → ci/gpu_ci_run_skyrl_train_megatron.sh
diff --git a/skyrl-train/examples/README.md → examples/train/README.md b/skyrl-train/examples/README.md → examples/train/README.md
diff --git a/...mples/algorithms/cispo/run_cispo_gsm8k.sh → ...train/algorithms/cispo/run_cispo_gsm8k.sh b/...mples/algorithms/cispo/run_cispo_gsm8k.sh → ...train/algorithms/cispo/run_cispo_gsm8k.sh
diff --git a/...ples/algorithms/clip_cov_kl_cov/README.md → ...rain/algorithms/clip_cov_kl_cov/README.md b/...ples/algorithms/clip_cov_kl_cov/README.md → ...rain/algorithms/clip_cov_kl_cov/README.md
diff --git a/...lgorithms/clip_cov_kl_cov/run_clip_cov.sh → ...lgorithms/clip_cov_kl_cov/run_clip_cov.sh b/...lgorithms/clip_cov_kl_cov/run_clip_cov.sh → ...lgorithms/clip_cov_kl_cov/run_clip_cov.sh
diff --git a/.../algorithms/clip_cov_kl_cov/run_kl_cov.sh → .../algorithms/clip_cov_kl_cov/run_kl_cov.sh b/.../algorithms/clip_cov_kl_cov/run_kl_cov.sh → .../algorithms/clip_cov_kl_cov/run_kl_cov.sh
diff --git a/...dvantage_estimator/main_custom_adv_est.py → ...dvantage_estimator/main_custom_adv_est.py b/...dvantage_estimator/main_custom_adv_est.py → ...dvantage_estimator/main_custom_adv_est.py
diff --git a/...advantage_estimator/run_custom_adv_est.sh → ...advantage_estimator/run_custom_adv_est.sh b/...advantage_estimator/run_custom_adv_est.sh → ...advantage_estimator/run_custom_adv_est.sh
diff --git a/...om_policy_loss/main_custom_policy_loss.py → ...om_policy_loss/main_custom_policy_loss.py b/...om_policy_loss/main_custom_policy_loss.py → ...om_policy_loss/main_custom_policy_loss.py
diff --git a/...tom_policy_loss/run_custom_policy_loss.sh → ...tom_policy_loss/run_custom_policy_loss.sh b/...tom_policy_loss/run_custom_policy_loss.sh → ...tom_policy_loss/run_custom_policy_loss.sh
diff --git a/...-train/examples/algorithms/dapo/README.md → examples/train/algorithms/dapo/README.md b/...-train/examples/algorithms/dapo/README.md → examples/train/algorithms/dapo/README.md
diff --git a/...orithms/dapo/data_preprocess_dapo_aime.py → ...orithms/dapo/data_preprocess_dapo_aime.py b/...orithms/dapo/data_preprocess_dapo_aime.py → ...orithms/dapo/data_preprocess_dapo_aime.py
diff --git a/...ain/examples/algorithms/dapo/main_dapo.py → examples/train/algorithms/dapo/main_dapo.py b/...ain/examples/algorithms/dapo/main_dapo.py → examples/train/algorithms/dapo/main_dapo.py
diff --git a/...ples/algorithms/dapo/prepare_dapo_data.sh → ...rain/algorithms/dapo/prepare_dapo_data.sh b/...ples/algorithms/dapo/prepare_dapo_data.sh → ...rain/algorithms/dapo/prepare_dapo_data.sh
diff --git a/...ithms/dapo/run_dapo_aime_qwen3_4b_aime.sh → ...ithms/dapo/run_dapo_aime_qwen3_4b_aime.sh b/...ithms/dapo/run_dapo_aime_qwen3_4b_aime.sh → ...ithms/dapo/run_dapo_aime_qwen3_4b_aime.sh
diff --git a/...xamples/algorithms/dapo/run_dapo_gsm8k.sh → ...s/train/algorithms/dapo/run_dapo_gsm8k.sh b/...xamples/algorithms/dapo/run_dapo_gsm8k.sh → ...s/train/algorithms/dapo/run_dapo_gsm8k.sh
diff --git a/...orithms/dapo/run_dapo_qwen2.5_32b_aime.sh → ...orithms/dapo/run_dapo_qwen2.5_32b_aime.sh b/...orithms/dapo/run_dapo_qwen2.5_32b_aime.sh → ...orithms/dapo/run_dapo_qwen2.5_32b_aime.sh
diff --git a/...hms/dapo/run_dapo_qwen2.5_math_7b_aime.sh → ...hms/dapo/run_dapo_qwen2.5_math_7b_aime.sh b/...hms/dapo/run_dapo_qwen2.5_math_7b_aime.sh → ...hms/dapo/run_dapo_qwen2.5_math_7b_aime.sh
diff --git a/...gorithms/dapo/run_dapo_qwen3_1.7b_aime.sh → ...gorithms/dapo/run_dapo_qwen3_1.7b_aime.sh b/...gorithms/dapo/run_dapo_qwen3_1.7b_aime.sh → ...gorithms/dapo/run_dapo_qwen3_1.7b_aime.sh
diff --git a/..._dapo_qwen3_30b_a3b_lora_megatron_aime.sh → ..._dapo_qwen3_30b_a3b_lora_megatron_aime.sh b/..._dapo_qwen3_30b_a3b_lora_megatron_aime.sh → ..._dapo_qwen3_30b_a3b_lora_megatron_aime.sh
diff --git a/...o/run_dapo_qwen3_30b_a3b_megatron_aime.sh → ...o/run_dapo_qwen3_30b_a3b_megatron_aime.sh b/...o/run_dapo_qwen3_30b_a3b_megatron_aime.sh → ...o/run_dapo_qwen3_30b_a3b_megatron_aime.sh
diff --git a/...les/algorithms/drgrpo/run_drgrpo_gsm8k.sh → ...ain/algorithms/drgrpo/run_drgrpo_gsm8k.sh b/...les/algorithms/drgrpo/run_drgrpo_gsm8k.sh → ...ain/algorithms/drgrpo/run_drgrpo_gsm8k.sh
diff --git a/...xamples/algorithms/gspo/run_gspo_gsm8k.sh → ...s/train/algorithms/gspo/run_gspo_gsm8k.sh b/...xamples/algorithms/gspo/run_gspo_gsm8k.sh → ...s/train/algorithms/gspo/run_gspo_gsm8k.sh
diff --git a/...algorithms/reinforce++/run_reinforce++.sh → ...algorithms/reinforce++/run_reinforce++.sh b/...algorithms/reinforce++/run_reinforce++.sh → ...algorithms/reinforce++/run_reinforce++.sh
diff --git a/...rain/examples/algorithms/rloo/run_rloo.sh → examples/train/algorithms/rloo/run_rloo.sh b/...rain/examples/algorithms/rloo/run_rloo.sh → examples/train/algorithms/rloo/run_rloo.sh
diff --git a/...-train/examples/algorithms/sapo/README.md → examples/train/algorithms/sapo/README.md b/...-train/examples/algorithms/sapo/README.md → examples/train/algorithms/sapo/README.md
diff --git a/...xamples/algorithms/sapo/run_sapo_gsm8k.sh → ...s/train/algorithms/sapo/run_sapo_gsm8k.sh b/...xamples/algorithms/sapo/run_sapo_gsm8k.sh → ...s/train/algorithms/sapo/run_sapo_gsm8k.sh
diff --git a/...algorithms/sapo/run_sapo_qwen3_4b_aime.sh → ...algorithms/sapo/run_sapo_qwen3_4b_aime.sh b/...algorithms/sapo/run_sapo_qwen3_4b_aime.sh → ...algorithms/sapo/run_sapo_qwen3_4b_aime.sh
diff --git a/skyrl-train/examples/async/README.md → examples/train/async/README.md b/skyrl-train/examples/async/README.md → examples/train/async/README.md
diff --git a/skyrl-train/examples/async/__init__.py → examples/train/async/__init__.py b/skyrl-train/examples/async/__init__.py → examples/train/async/__init__.py
diff --git a/...l-train/examples/async/async_run_gsm8k.sh → examples/train/async/async_run_gsm8k.sh b/...l-train/examples/async/async_run_gsm8k.sh → examples/train/async/async_run_gsm8k.sh
diff --git a/skyrl-train/examples/async/async_trainer.py → examples/train/async/async_trainer.py b/skyrl-train/examples/async/async_trainer.py → examples/train/async/async_trainer.py
diff --git a/skyrl-train/examples/async/main_async.py → examples/train/async/main_async.py b/skyrl-train/examples/async/main_async.py → examples/train/async/main_async.py
diff --git a/skyrl-train/examples/flash_rl/.env.0.5b_int8 → examples/train/flash_rl/.env.0.5b_int8 b/skyrl-train/examples/flash_rl/.env.0.5b_int8 → examples/train/flash_rl/.env.0.5b_int8
diff --git a/skyrl-train/examples/flash_rl/.env.fp8 → examples/train/flash_rl/.env.fp8 b/skyrl-train/examples/flash_rl/.env.fp8 → examples/train/flash_rl/.env.fp8
diff --git a/skyrl-train/examples/flash_rl/.env.int8 → examples/train/flash_rl/.env.int8 b/skyrl-train/examples/flash_rl/.env.int8 → examples/train/flash_rl/.env.int8
diff --git a/...rain/examples/flash_rl/flash_rl_engine.py → examples/train/flash_rl/flash_rl_engine.py b/...rain/examples/flash_rl/flash_rl_engine.py → examples/train/flash_rl/flash_rl_engine.py
diff --git a/...in/examples/flash_rl/main_dapo_flashrl.py → examples/train/flash_rl/main_dapo_flashrl.py b/...in/examples/flash_rl/main_dapo_flashrl.py → examples/train/flash_rl/main_dapo_flashrl.py
diff --git a/...ash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh → ...ash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh b/...ash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh → ...ash_rl/run_dapo_gsm8k_flashrl_0.5b_fp8.sh
diff --git a/...sh_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh → ...sh_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh b/...sh_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh → ...sh_rl/run_dapo_gsm8k_flashrl_0.5b_int8.sh
diff --git a/...ash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh → ...ash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh b/...ash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh → ...ash_rl/run_dapo_gsm8k_flashrl_32b_int8.sh
diff --git a/...sh_rl/run_dapo_repro_flashrl_0.5b_int8.sh → ...sh_rl/run_dapo_repro_flashrl_0.5b_int8.sh b/...sh_rl/run_dapo_repro_flashrl_0.5b_int8.sh → ...sh_rl/run_dapo_repro_flashrl_0.5b_int8.sh
diff --git a/...ash_rl/run_dapo_repro_flashrl_32b_int8.sh → ...ash_rl/run_dapo_repro_flashrl_32b_int8.sh b/...ash_rl/run_dapo_repro_flashrl_32b_int8.sh → ...ash_rl/run_dapo_repro_flashrl_32b_int8.sh
diff --git a/skyrl/examples/train/fully_async/README.md → examples/train/fully_async/README.md b/skyrl/examples/train/fully_async/README.md → examples/train/fully_async/README.md
diff --git a/skyrl-train/examples/fully_async/__init__.py → examples/train/fully_async/__init__.py b/skyrl-train/examples/fully_async/__init__.py → examples/train/fully_async/__init__.py
diff --git a/...rain/fully_async/fully_async_run_gsm8k.sh → ...rain/fully_async/fully_async_run_gsm8k.sh b/...rain/fully_async/fully_async_run_gsm8k.sh → ...rain/fully_async/fully_async_run_gsm8k.sh
diff --git a/...les/train/fully_async/main_fully_async.py → ...les/train/fully_async/main_fully_async.py b/...les/train/fully_async/main_fully_async.py → ...les/train/fully_async/main_fully_async.py
diff --git a/...-train/examples/gptoss/bench_flex_attn.py → examples/train/gptoss/bench_flex_attn.py b/...-train/examples/gptoss/bench_flex_attn.py → examples/train/gptoss/bench_flex_attn.py
diff --git a/...train/examples/gptoss/run_gsm8k_gptoss.sh → examples/train/gptoss/run_gsm8k_gptoss.sh b/...train/examples/gptoss/run_gsm8k_gptoss.sh → examples/train/gptoss/run_gsm8k_gptoss.sh
diff --git a/skyrl-train/examples/gptoss/test_gptoss.py → examples/train/gptoss/test_gptoss.py b/skyrl-train/examples/gptoss/test_gptoss.py → examples/train/gptoss/test_gptoss.py
diff --git a/...n/examples/gsm8k/gsm8k-grpo-skypilot.yaml → ...ples/train/gsm8k/gsm8k-grpo-skypilot.yaml b/...n/examples/gsm8k/gsm8k-grpo-skypilot.yaml → ...ples/train/gsm8k/gsm8k-grpo-skypilot.yaml
diff --git a/skyrl-train/examples/gsm8k/gsm8k_dataset.py → examples/train/gsm8k/gsm8k_dataset.py b/skyrl-train/examples/gsm8k/gsm8k_dataset.py → examples/train/gsm8k/gsm8k_dataset.py
diff --git a/skyrl-train/examples/gsm8k/run_32b_gsm8k.sh → examples/train/gsm8k/run_32b_gsm8k.sh b/skyrl-train/examples/gsm8k/run_32b_gsm8k.sh → examples/train/gsm8k/run_32b_gsm8k.sh
diff --git a/...in/examples/gsm8k/run_generation_gsm8k.sh → examples/train/gsm8k/run_generation_gsm8k.sh b/...in/examples/gsm8k/run_generation_gsm8k.sh → examples/train/gsm8k/run_generation_gsm8k.sh
diff --git a/skyrl/examples/train/gsm8k/run_gsm8k.sh → examples/train/gsm8k/run_gsm8k.sh b/skyrl/examples/train/gsm8k/run_gsm8k.sh → examples/train/gsm8k/run_gsm8k.sh
diff --git a/...l-train/examples/gsm8k/run_gsm8k_modal.sh → examples/train/gsm8k/run_gsm8k_modal.sh b/...l-train/examples/gsm8k/run_gsm8k_modal.sh → examples/train/gsm8k/run_gsm8k_modal.sh
diff --git a/skyrl-train/examples/harbor/README.md → examples/train/harbor/README.md b/skyrl-train/examples/harbor/README.md → examples/train/harbor/README.md
diff --git a/skyrl-train/examples/harbor/dataset.py → examples/train/harbor/dataset.py b/skyrl-train/examples/harbor/dataset.py → examples/train/harbor/dataset.py
diff --git a/...xamples/harbor/entrypoints/main_harbor.py → ...s/train/harbor/entrypoints/main_harbor.py b/...xamples/harbor/entrypoints/main_harbor.py → ...s/train/harbor/entrypoints/main_harbor.py
diff --git a/...arbor/entrypoints/main_harbor_generate.py → ...arbor/entrypoints/main_harbor_generate.py b/...arbor/entrypoints/main_harbor_generate.py → ...arbor/entrypoints/main_harbor_generate.py
diff --git a/...train/examples/harbor/harbor_generator.py → examples/train/harbor/harbor_generator.py b/...train/examples/harbor/harbor_generator.py → examples/train/harbor/harbor_generator.py
diff --git a/...s/harbor/harbor_trial_config/default.yaml → ...n/harbor/harbor_trial_config/default.yaml b/...s/harbor/harbor_trial_config/default.yaml → ...n/harbor/harbor_trial_config/default.yaml
diff --git a/...examples/harbor/prepare_harbor_dataset.py → ...es/train/harbor/prepare_harbor_dataset.py b/...examples/harbor/prepare_harbor_dataset.py → ...es/train/harbor/prepare_harbor_dataset.py
diff --git a/...-train/examples/harbor/run_codecontest.sh → examples/train/harbor/run_codecontest.sh b/...-train/examples/harbor/run_codecontest.sh → examples/train/harbor/run_codecontest.sh
diff --git a/...l-train/examples/harbor/run_harbor_gen.sh → examples/train/harbor/run_harbor_gen.sh b/...l-train/examples/harbor/run_harbor_gen.sh → examples/train/harbor/run_harbor_gen.sh
diff --git a/skyrl-train/examples/harbor/run_otagent.sh → examples/train/harbor/run_otagent.sh b/skyrl-train/examples/harbor/run_otagent.sh → examples/train/harbor/run_otagent.sh
diff --git a/skyrl-train/examples/livecodebench/lcb.md → examples/train/livecodebench/lcb.md b/skyrl-train/examples/livecodebench/lcb.md → examples/train/livecodebench/lcb.md
diff --git a/...ain/examples/livecodebench/lcb_dataset.py → examples/train/livecodebench/lcb_dataset.py b/...ain/examples/livecodebench/lcb_dataset.py → examples/train/livecodebench/lcb_dataset.py
diff --git a/...in/examples/livecodebench/lcb_download.py → examples/train/livecodebench/lcb_download.py b/...in/examples/livecodebench/lcb_download.py → examples/train/livecodebench/lcb_download.py
diff --git a/...l-train/examples/livecodebench/run_lcb.sh → examples/train/livecodebench/run_lcb.sh b/...l-train/examples/livecodebench/run_lcb.sh → examples/train/livecodebench/run_lcb.sh
diff --git a/...les/llm_as_a_judge/gsm8k_dataset_judge.py → ...ain/llm_as_a_judge/gsm8k_dataset_judge.py b/...les/llm_as_a_judge/gsm8k_dataset_judge.py → ...ain/llm_as_a_judge/gsm8k_dataset_judge.py
diff --git a/.../examples/llm_as_a_judge/llm_judge_env.py → ...les/train/llm_as_a_judge/llm_judge_env.py b/.../examples/llm_as_a_judge/llm_judge_env.py → ...les/train/llm_as_a_judge/llm_judge_env.py
diff --git a/...examples/llm_as_a_judge/main_llm_judge.py → ...es/train/llm_as_a_judge/main_llm_judge.py b/...examples/llm_as_a_judge/main_llm_judge.py → ...es/train/llm_as_a_judge/main_llm_judge.py
diff --git a/.../examples/llm_as_a_judge/run_llm_judge.sh → ...les/train/llm_as_a_judge/run_llm_judge.sh b/.../examples/llm_as_a_judge/run_llm_judge.sh → ...les/train/llm_as_a_judge/run_llm_judge.sh
diff --git a/.../lora/run_qwen2_5_0.5b_gsm8k_grpo_lora.sh → .../lora/run_qwen2_5_0.5b_gsm8k_grpo_lora.sh b/.../lora/run_qwen2_5_0.5b_gsm8k_grpo_lora.sh → .../lora/run_qwen2_5_0.5b_gsm8k_grpo_lora.sh
diff --git a/...s/lora/run_qwen2_5_0.5b_gsm8k_ppo_lora.sh → ...n/lora/run_qwen2_5_0.5b_gsm8k_ppo_lora.sh b/...s/lora/run_qwen2_5_0.5b_gsm8k_ppo_lora.sh → ...n/lora/run_qwen2_5_0.5b_gsm8k_ppo_lora.sh
diff --git a/...in/examples/megatron/run_fsdp_baseline.sh → examples/train/megatron/run_fsdp_baseline.sh b/...in/examples/megatron/run_fsdp_baseline.sh → examples/train/megatron/run_fsdp_baseline.sh
diff --git a/...l/examples/train/megatron/run_megatron.sh → examples/train/megatron/run_megatron.sh b/...l/examples/train/megatron/run_megatron.sh → examples/train/megatron/run_megatron.sh
diff --git a/...run_megatron_dapo_qwen3_235b_a22b_lora.sh → ...run_megatron_dapo_qwen3_235b_a22b_lora.sh b/...run_megatron_dapo_qwen3_235b_a22b_lora.sh → ...run_megatron_dapo_qwen3_235b_a22b_lora.sh
diff --git a/...gatron/run_megatron_dapo_qwen3_30b_a3b.sh → ...gatron/run_megatron_dapo_qwen3_30b_a3b.sh b/...gatron/run_megatron_dapo_qwen3_30b_a3b.sh → ...gatron/run_megatron_dapo_qwen3_30b_a3b.sh
diff --git a/...n/run_megatron_dapo_qwen3_30b_a3b_lora.sh → ...n/run_megatron_dapo_qwen3_30b_a3b_lora.sh b/...n/run_megatron_dapo_qwen3_30b_a3b_lora.sh → ...n/run_megatron_dapo_qwen3_30b_a3b_lora.sh
diff --git a/...es/megatron/run_megatron_dapo_qwen3_4b.sh → ...in/megatron/run_megatron_dapo_qwen3_4b.sh b/...es/megatron/run_megatron_dapo_qwen3_4b.sh → ...in/megatron/run_megatron_dapo_qwen3_4b.sh
diff --git a/...gatron/run_megatron_dapo_qwen3_4b_lora.sh → ...gatron/run_megatron_dapo_qwen3_4b_lora.sh b/...gatron/run_megatron_dapo_qwen3_4b_lora.sh → ...gatron/run_megatron_dapo_qwen3_4b_lora.sh
diff --git a/.../megatron/run_megatron_lora_qwen3-0.6b.sh → .../megatron/run_megatron_lora_qwen3-0.6b.sh b/.../megatron/run_megatron_lora_qwen3-0.6b.sh → .../megatron/run_megatron_lora_qwen3-0.6b.sh
diff --git a/...gatron/run_megatron_lora_qwen3-30b-a3b.sh → ...gatron/run_megatron_lora_qwen3-30b-a3b.sh b/...gatron/run_megatron_lora_qwen3-30b-a3b.sh → ...gatron/run_megatron_lora_qwen3-30b-a3b.sh
diff --git a/...amples/megatron/run_megatron_moonlight.sh → .../train/megatron/run_megatron_moonlight.sh b/...amples/megatron/run_megatron_moonlight.sh → .../train/megatron/run_megatron_moonlight.sh
diff --git a/.../megatron/run_megatron_qwen3-235b-a22b.sh → .../megatron/run_megatron_qwen3-235b-a22b.sh b/.../megatron/run_megatron_qwen3-235b-a22b.sh → .../megatron/run_megatron_qwen3-235b-a22b.sh
diff --git a/...es/megatron/run_megatron_qwen3-30b-a3b.sh → ...in/megatron/run_megatron_qwen3-30b-a3b.sh b/...es/megatron/run_megatron_qwen3-30b-a3b.sh → ...in/megatron/run_megatron_qwen3-30b-a3b.sh
diff --git a/.../examples/megatron/run_search_megatron.sh → ...les/train/megatron/run_search_megatron.sh b/.../examples/megatron/run_search_megatron.sh → ...les/train/megatron/run_search_megatron.sh
diff --git a/...rain/examples/mini_swe_agent/.env.miniswe → examples/train/mini_swe_agent/.env.miniswe b/...rain/examples/mini_swe_agent/.env.miniswe → examples/train/mini_swe_agent/.env.miniswe
diff --git a/...l-train/examples/mini_swe_agent/README.md → examples/train/mini_swe_agent/README.md b/...l-train/examples/mini_swe_agent/README.md → examples/train/mini_swe_agent/README.md
diff --git a/...rain/examples/mini_swe_agent/litellm.json → examples/train/mini_swe_agent/litellm.json b/...rain/examples/mini_swe_agent/litellm.json → examples/train/mini_swe_agent/litellm.json
diff --git a/.../examples/mini_swe_agent/main_mini_swe.py → ...les/train/mini_swe_agent/main_mini_swe.py b/.../examples/mini_swe_agent/main_mini_swe.py → ...les/train/mini_swe_agent/main_mini_swe.py
diff --git a/...ples/mini_swe_agent/mini_swe_generator.py → ...rain/mini_swe_agent/mini_swe_generator.py b/...ples/mini_swe_agent/mini_swe_generator.py → ...rain/mini_swe_agent/mini_swe_generator.py
diff --git a/...examples/mini_swe_agent/mini_swe_utils.py → ...es/train/mini_swe_agent/mini_swe_utils.py b/...examples/mini_swe_agent/mini_swe_utils.py → ...es/train/mini_swe_agent/mini_swe_utils.py
diff --git a/...mples/mini_swe_agent/preprocess_swegym.py → ...train/mini_swe_agent/preprocess_swegym.py b/...mples/mini_swe_agent/preprocess_swegym.py → ...train/mini_swe_agent/preprocess_swegym.py
diff --git a/...amples/mini_swe_agent/run_mini_swe_30B.sh → .../train/mini_swe_agent/run_mini_swe_30B.sh b/...amples/mini_swe_agent/run_mini_swe_30B.sh → .../train/mini_swe_agent/run_mini_swe_30B.sh
diff --git a/...xamples/mini_swe_agent/run_mini_swe_8B.sh → ...s/train/mini_swe_agent/run_mini_swe_8B.sh b/...xamples/mini_swe_agent/run_mini_swe_8B.sh → ...s/train/mini_swe_agent/run_mini_swe_8B.sh
diff --git a/...ain/examples/mini_swe_agent/swebench.yaml → examples/train/mini_swe_agent/swebench.yaml b/...ain/examples/mini_swe_agent/swebench.yaml → examples/train/mini_swe_agent/swebench.yaml
diff --git a/skyrl-train/examples/moe/README.md → examples/train/moe/README.md b/skyrl-train/examples/moe/README.md → examples/train/moe/README.md
diff --git a/...ain/examples/moe/run_qwen1_5_MoE_A2_7B.sh → examples/train/moe/run_qwen1_5_MoE_A2_7B.sh b/...ain/examples/moe/run_qwen1_5_MoE_A2_7B.sh → examples/train/moe/run_qwen1_5_MoE_A2_7B.sh
diff --git a/skyrl-train/examples/multiply/env.py → examples/train/multiply/env.py b/skyrl-train/examples/multiply/env.py → examples/train/multiply/env.py
diff --git a/...-train/examples/multiply/main_multiply.py → examples/train/multiply/main_multiply.py b/...-train/examples/multiply/main_multiply.py → examples/train/multiply/main_multiply.py
diff --git a/...ain/examples/multiply/multiply_dataset.py → examples/train/multiply/multiply_dataset.py b/...ain/examples/multiply/multiply_dataset.py → examples/train/multiply/multiply_dataset.py
diff --git a/...l-train/examples/multiply/run_multiply.sh → examples/train/multiply/run_multiply.sh b/...l-train/examples/multiply/run_multiply.sh → examples/train/multiply/run_multiply.sh
diff --git a/...examples/on_policy_distillation/README.md → ...es/train/on_policy_distillation/README.md b/...examples/on_policy_distillation/README.md → ...es/train/on_policy_distillation/README.md
diff --git a/...cy_distillation/main_on_policy_distill.py → ...cy_distillation/main_on_policy_distill.py b/...cy_distillation/main_on_policy_distill.py → ...cy_distillation/main_on_policy_distill.py
diff --git a/.../run_on_policy_distill_math_qwen3_1.7b.sh → .../run_on_policy_distill_math_qwen3_1.7b.sh b/.../run_on_policy_distill_math_qwen3_1.7b.sh → .../run_on_policy_distill_math_qwen3_1.7b.sh
diff --git a/...on/run_on_policy_distill_math_qwen3_4b.sh → ...on/run_on_policy_distill_math_qwen3_4b.sh b/...on/run_on_policy_distill_math_qwen3_4b.sh → ...on/run_on_policy_distill_math_qwen3_4b.sh
diff --git a/skyrl-train/examples/ppo/run_ppo.sh → examples/train/ppo/run_ppo.sh b/skyrl-train/examples/ppo/run_ppo.sh → examples/train/ppo/run_ppo.sh
diff --git a/...les/remote_inference_engine/run_remote.sh → ...ain/remote_inference_engine/run_remote.sh b/...les/remote_inference_engine/run_remote.sh → ...ain/remote_inference_engine/run_remote.sh
diff --git a/...ote_inference_engine/run_sglang_server.sh → ...ote_inference_engine/run_sglang_server.sh b/...ote_inference_engine/run_sglang_server.sh → ...ote_inference_engine/run_sglang_server.sh
diff --git a/...emote_inference_engine/run_vllm_server.sh → ...emote_inference_engine/run_vllm_server.sh b/...emote_inference_engine/run_vllm_server.sh → ...emote_inference_engine/run_vllm_server.sh
diff --git a/skyrl/examples/train/search/README.md → examples/train/search/README.md b/skyrl/examples/train/search/README.md → examples/train/search/README.md
diff --git a/...rain/search/retriever/retrieval_launch.sh → ...rain/search/retriever/retrieval_launch.sh b/...rain/search/retriever/retrieval_launch.sh → ...rain/search/retriever/retrieval_launch.sh
diff --git a/...ples/search/retriever/retrieval_server.py → ...rain/search/retriever/retrieval_server.py b/...ples/search/retriever/retrieval_server.py → ...rain/search/retriever/retrieval_server.py
diff --git a/skyrl/examples/train/search/run_search.sh → examples/train/search/run_search.sh b/skyrl/examples/train/search/run_search.sh → examples/train/search/run_search.sh
diff --git a/.../search/run_search_conversation_format.sh → .../search/run_search_conversation_format.sh b/.../search/run_search_conversation_format.sh → .../search/run_search_conversation_format.sh
diff --git a/...train/examples/search/searchr1_dataset.py → examples/train/search/searchr1_dataset.py b/...train/examples/search/searchr1_dataset.py → examples/train/search/searchr1_dataset.py
diff --git a/...rain/examples/search/searchr1_download.py → examples/train/search/searchr1_download.py b/...rain/examples/search/searchr1_download.py → examples/train/search/searchr1_download.py
diff --git a/skyrl-train/examples/sft/README.md → examples/train/sft/README.md b/skyrl-train/examples/sft/README.md → examples/train/sft/README.md
diff --git a/skyrl-train/examples/sft/sft_trainer.py → examples/train/sft/sft_trainer.py b/skyrl-train/examples/sft/sft_trainer.py → examples/train/sft/sft_trainer.py
diff --git a/...ples/step_wise/run_skyrl_sql_step_wise.sh → ...rain/step_wise/run_skyrl_sql_step_wise.sh b/...ples/step_wise/run_skyrl_sql_step_wise.sh → ...rain/step_wise/run_skyrl_sql_step_wise.sh
diff --git a/...tep_wise/run_skyrl_sql_step_wise_qwen3.sh → ...tep_wise/run_skyrl_sql_step_wise_qwen3.sh b/...tep_wise/run_skyrl_sql_step_wise_qwen3.sh → ...tep_wise/run_skyrl_sql_step_wise_qwen3.sh
diff --git a/...ain/examples/text_to_sql/run_skyrl_sql.sh → examples/train/text_to_sql/run_skyrl_sql.sh b/...ain/examples/text_to_sql/run_skyrl_sql.sh → examples/train/text_to_sql/run_skyrl_sql.sh
diff --git a/..._sql/run_skyrl_sql_conversation_format.sh → ..._sql/run_skyrl_sql_conversation_format.sh b/..._sql/run_skyrl_sql_conversation_format.sh → ..._sql/run_skyrl_sql_conversation_format.sh
diff --git a/...ext_to_sql/run_skyrl_sql_megatron_lora.sh → ...ext_to_sql/run_skyrl_sql_megatron_lora.sh b/...ext_to_sql/run_skyrl_sql_megatron_lora.sh → ...ext_to_sql/run_skyrl_sql_megatron_lora.sh
diff --git a/...rain/examples/text_to_sql/run_sql_fsdp.sh → examples/train/text_to_sql/run_sql_fsdp.sh b/...rain/examples/text_to_sql/run_sql_fsdp.sh → examples/train/text_to_sql/run_sql_fsdp.sh
diff --git a/...xamples/text_to_sql/run_sql_fsdp_2node.sh → ...s/train/text_to_sql/run_sql_fsdp_2node.sh b/...xamples/text_to_sql/run_sql_fsdp_2node.sh → ...s/train/text_to_sql/run_sql_fsdp_2node.sh
diff --git a/skyrl-train/examples/text_to_sql/sql.md → examples/train/text_to_sql/sql.md b/skyrl-train/examples/text_to_sql/sql.md → examples/train/text_to_sql/sql.md
diff --git a/.../examples/tis_correction/main_tis_dapo.py → ...les/train/tis_correction/main_tis_dapo.py b/.../examples/tis_correction/main_tis_dapo.py → ...les/train/tis_correction/main_tis_dapo.py
diff --git a/...n/examples/tis_correction/run_dapo_tis.sh → ...ples/train/tis_correction/run_dapo_tis.sh b/...n/examples/tis_correction/run_dapo_tis.sh → ...ples/train/tis_correction/run_dapo_tis.sh
diff --git a/...amples/training_backends/fsdp/run_fsdp.sh → .../train/training_backends/fsdp/run_fsdp.sh b/...amples/training_backends/fsdp/run_fsdp.sh → .../train/training_backends/fsdp/run_fsdp.sh
diff --git a/...mples/training_backends/fsdp/run_fsdp2.sh → ...train/training_backends/fsdp/run_fsdp2.sh b/...mples/training_backends/fsdp/run_fsdp2.sh → ...train/training_backends/fsdp/run_fsdp2.sh
diff --git a/...ples/training_backends/run_no_seq_pack.sh → ...rain/training_backends/run_no_seq_pack.sh b/...ples/training_backends/run_no_seq_pack.sh → ...rain/training_backends/run_no_seq_pack.sh
diff --git a/...ain/examples/turn_level_rewards/README.md → examples/train/turn_level_rewards/README.md b/...ain/examples/turn_level_rewards/README.md → examples/train/turn_level_rewards/README.md
diff --git a/...level_rewards/gsm8k_multi_turn_dataset.py → ...level_rewards/gsm8k_multi_turn_dataset.py b/...level_rewards/gsm8k_multi_turn_dataset.py → ...level_rewards/gsm8k_multi_turn_dataset.py
diff --git a/...urn_level_rewards/run_gsm8k_multi_turn.sh → ...urn_level_rewards/run_gsm8k_multi_turn.sh b/...urn_level_rewards/run_gsm8k_multi_turn.sh → ...urn_level_rewards/run_gsm8k_multi_turn.sh
diff --git a/skyrl-train/integrations/modal/README.md → examples/train_integrations/modal/README.md b/skyrl-train/integrations/modal/README.md → examples/train_integrations/modal/README.md
diff --git a/skyrl-train/integrations/modal/main.py → examples/train_integrations/modal/main.py b/skyrl-train/integrations/modal/main.py → examples/train_integrations/modal/main.py
diff --git a/skyrl-train/integrations/openenv/README.md → ...ples/train_integrations/openenv/README.md b/skyrl-train/integrations/openenv/README.md → ...ples/train_integrations/openenv/README.md
diff --git a/...tions/openenv/entrypoints/main_openenv.py → ...tions/openenv/entrypoints/main_openenv.py b/...tions/openenv/entrypoints/main_openenv.py → ...tions/openenv/entrypoints/main_openenv.py
diff --git a/skyrl-train/integrations/openenv/env.py → examples/train_integrations/openenv/env.py b/skyrl-train/integrations/openenv/env.py → examples/train_integrations/openenv/env.py
diff --git a/...tegrations/openenv/install_environment.py → ...tegrations/openenv/install_environment.py b/...tegrations/openenv/install_environment.py → ...tegrations/openenv/install_environment.py
diff --git a/...grations/openenv/prepare_dummy_dataset.py → ...grations/openenv/prepare_dummy_dataset.py b/...grations/openenv/prepare_dummy_dataset.py → ...grations/openenv/prepare_dummy_dataset.py
diff --git a/...train/integrations/openenv/run_openenv.sh → ...train_integrations/openenv/run_openenv.sh b/...train/integrations/openenv/run_openenv.sh → ...train_integrations/openenv/run_openenv.sh
diff --git a/skyrl-train/integrations/verifiers/README.md → ...es/train_integrations/verifiers/README.md b/skyrl-train/integrations/verifiers/README.md → ...es/train_integrations/verifiers/README.md
diff --git a/...s/verifiers/entrypoints/main_verifiers.py → ...s/verifiers/entrypoints/main_verifiers.py b/...s/verifiers/entrypoints/main_verifiers.py → ...s/verifiers/entrypoints/main_verifiers.py
diff --git a/...grations/verifiers/install_environment.py → ...grations/verifiers/install_environment.py b/...grations/verifiers/install_environment.py → ...grations/verifiers/install_environment.py
diff --git a/...integrations/verifiers/prepare_dataset.py → ...integrations/verifiers/prepare_dataset.py b/...integrations/verifiers/prepare_dataset.py → ...integrations/verifiers/prepare_dataset.py
diff --git a/...n/integrations/verifiers/run_verifiers.sh → ...n_integrations/verifiers/run_verifiers.sh b/...n/integrations/verifiers/run_verifiers.sh → ...n_integrations/verifiers/run_verifiers.sh
diff --git a/...grations/verifiers/verifiers_generator.py → ...grations/verifiers/verifiers_generator.py b/...grations/verifiers/verifiers_generator.py → ...grations/verifiers/verifiers_generator.py
diff --git a/skyrl-train/scripts/full_context/README.md → ...ples/train_scripts/full_context/README.md b/skyrl-train/scripts/full_context/README.md → ...ples/train_scripts/full_context/README.md
diff --git a/skyrl-train/scripts/full_context/__init__.py → ...es/train_scripts/full_context/__init__.py b/skyrl-train/scripts/full_context/__init__.py → ...es/train_scripts/full_context/__init__.py
diff --git a/...ain/scripts/full_context/main_full_ctx.py → ...ain_scripts/full_context/main_full_ctx.py b/...ain/scripts/full_context/main_full_ctx.py → ...ain_scripts/full_context/main_full_ctx.py
diff --git a/...rain/scripts/full_context/run_full_ctx.sh → ...rain_scripts/full_context/run_full_ctx.sh b/...rain/scripts/full_context/run_full_ctx.sh → ...rain_scripts/full_context/run_full_ctx.sh
diff --git a/...pts/full_context/run_full_ctx_megatron.sh → ...pts/full_context/run_full_ctx_megatron.sh b/...pts/full_context/run_full_ctx_megatron.sh → ...pts/full_context/run_full_ctx_megatron.sh
diff --git a/.../scripts/full_context/trainer_full_ctx.py → ..._scripts/full_context/trainer_full_ctx.py b/.../scripts/full_context/trainer_full_ctx.py → ..._scripts/full_context/trainer_full_ctx.py
diff --git a/...scripts/launch_multiple_remote_servers.py → ...scripts/launch_multiple_remote_servers.py b/...scripts/launch_multiple_remote_servers.py → ...scripts/launch_multiple_remote_servers.py
diff --git a/skyrl-train/scripts/multi_node_nccl_test.py → ...les/train_scripts/multi_node_nccl_test.py b/skyrl-train/scripts/multi_node_nccl_test.py → ...les/train_scripts/multi_node_nccl_test.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,13 +1,247 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+include = ["skyrl*"]
+
 [project]
 name = "skyrl"
-version = "0.1.0"
-description = "A Post-Training Stack for LLMs"
-authors = [
-    { name = "NovaSkyAI Team"}
-]
-license = { text = "MIT" }
+dynamic = ["version"]
+description = "Unified API for training and inference"
 readme = "README.md"
-requires-python = "==3.12.*"
+requires-python = ">=3.11"
+dependencies = [
+    "datasets>=4.0.0",
+    "pillow>=11.3.0",
+    "rich>=14.1.0",
+    "safetensors>=0.6.2",
+    "tokenizers>=0.21.2",
+    "transformers>=4.56.1,<5",
+    "typer>=0.17.4",
+    # "wandb>=0.22.0",
+    "peft",
+    "hf_transfer",
+    "cloudpathlib>=0.23.0",
+]
+
+[project.optional-dependencies]
+gpu = [
+    "jax[cuda12]>=0.7.2; sys_platform == 'linux'",
+]
+
+tpu = [
+    "jax[tpu]>=0.7.2; sys_platform == 'linux'",
+]
+
+tinker = [
+    "tinker>=0.3.0",
+    "fastapi[standard]",
+    "sqlmodel",
+    "sqlalchemy[asyncio]",
+    "aiosqlite",
+    "asyncpg",
+    "psycopg2-binary",
+]
+
+aws = [
+    "cloudpathlib[s3]",
+]
+
+gcp = [
+    "cloudpathlib[gs]",
+]
+
+azure = [
+    "cloudpathlib[azure]",
+]
+
+# The extras "jax", "fsdp", and "megatron" are the dependencies the
+# engine needs for --backend="jax", --backend="fsdp", and --backend="megatron",
+# respectively.
+
+jax = [
+    "jax>=0.8,<1.0",
+    "jax[cuda12]>=0.7.2; sys_platform == 'linux'",
+    "flax>=0.12.2",
+    "optax>=0.2.5",
+]
+
+skyrl-train = [
+    "loguru",
+    "tqdm",
+    "ninja",
+    "tensorboard",
+    "func_timeout",
+    "transformers>=4.51.0",
+    "hydra-core==1.3.2",
+    "accelerate",
+    "torchdata",
+    "omegaconf",
+    "ray==2.51.1",
+    "peft",
+    "debugpy==1.8.0",
+    "hf_transfer",
+    "wandb",
+    "datasets>=4.0.0",
+    "tensordict",
+    "jaxtyping",
+    "skyrl-gym",
+    "flash-attn; sys_platform == 'linux'",
+    "polars",
+    "s3fs",
+    "fastapi",
+    "uvicorn",
+    "pybind11",
+    "setuptools",
+]
+
+fsdp = [
+    "skyrl[skyrl-train]",
+    "vllm==0.13.0; sys_platform == 'linux'",
+    "flash-attn==2.8.3; sys_platform == 'linux'",
+    "torch==2.9.0; sys_platform == 'linux'",
+    "flashinfer-python; sys_platform == 'linux' and platform_machine == 'x86_64'",
+    "flashinfer-jit-cache==0.5.3; sys_platform == 'linux' and platform_machine == 'x86_64'",
+    "torchvision; sys_platform == 'linux'",
+]
+
+# megatron is pinned to python 3.12 due to ml-dtypes needing to be >= 0.5.0
+# potentially loosen this restriction if this error can be root caused.
+megatron = [
+    "skyrl[skyrl-train]; python_version == '3.12'",
+    "transformer-engine[pytorch]==2.10.0; sys_platform == 'linux' and python_version == '3.12'",
+    "flash-attn==2.8.1; sys_platform == 'linux' and python_version == '3.12'",
+    "vllm==0.13.0; sys_platform == 'linux' and python_version == '3.12'",
+    "torch==2.9.0; sys_platform == 'linux' and python_version == '3.12'",
+    "flashinfer-python==0.5.3; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version == '3.12'",
+    "torchvision; sys_platform == 'linux' and python_version == '3.12'",
+    "megatron-bridge; sys_platform == 'linux' and python_version == '3.12'",
+    "megatron-core==0.15.0; sys_platform == 'linux' and python_version == '3.12'",
+    "flashinfer-jit-cache==0.5.3; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version == '3.12'",
+    "nvidia-modelopt; sys_platform == 'linux' and python_version == '3.12'",
+]
+
+flashrl = [
+    "skyrl[skyrl-train]",
+    # NOTE: Custom vLLM wheel must be installed separately.
+    # See examples/flash_rl/README.md for installation instructions.
+    "flash-attn==2.8.3; sys_platform == 'linux'",
+    "torch==2.7.0; sys_platform == 'linux'",
+    "flashinfer-python; sys_platform == 'linux'",
+    "torchvision; sys_platform == 'linux'",
+]
+miniswe = [
+    "skyrl[skyrl-train]",
+    # NOTE (sumanthrh): Needs to be a commit after https://github.com/SWE-agent/mini-swe-agent/commit/4f5d445e99d13b5482478c23508bf2fbf7c0670c
+    "mini-swe-agent>=1.12.0",
+    "litellm",
+]
+
+dev = [
+    "mkdocs",
+    "mkdocs-material",
+    "pytest",
+    "pytest-forked",
+    "pytest-asyncio",
+    "pre-commit",
+    "litellm",
+    "torch",
+    "ty",
+    "cloudpathlib[s3]",
+    "alembic",
+]
+
+[tool.setuptools]
+include-package-data = true
+
+[tool.setuptools.dynamic]
+version = {attr = "skyrl.__version__"}
+
+[project.scripts]
+# The following is for supporting the skyrl-train dependency
+
+[tool.uv]
+# Resolve for both Linux (production) and macOS (dev)
+required-environments = [
+    "sys_platform == 'linux'",
+    "sys_platform == 'darwin' and platform_machine == 'arm64'",
+]
+
+constraint-dependencies = [
+    "flashinfer-jit-cache==0.5.3",
+]
+# each backend should have separate dependencies that can potentially clash
+# megatron also clashes with the jax dependency from gpu and tpu extras
+conflicts = [
+    [
+        { extra = "jax" },
+        { extra = "megatron" },
+        { extra = "fsdp" },
+        { extra = "flashrl" },
+    ],
+    [
+        { extra = "megatron" },
+        { extra = "gpu" },
+        { extra = "tpu" },
+        { extra = "flashrl" },
+        { extra = "miniswe" },
+    ]
+]
+# disable build isolation for megatron related dependencies
+no-build-isolation-package = [
+    "transformer-engine-torch",
+    "transformer-engine",
+    "nv-grouped-gemm",
+]
+# override unnecessary dependencies and pin versions to override Megatron-Bridge
+# unppinned dependencies
+override-dependencies = [
+    "nvidia-resiliency-ext; sys_platform == 'never'",
+    "mamba-ssm; sys_platform == 'never'",
+    "causal-conv1d; sys_platform == 'never'",
+    "transformer-engine[pytorch]==2.10.0; sys_platform == 'linux'",
+    "megatron-core==0.15.0; sys_platform == 'linux'",
+]
+
+[tool.uv.extra-build-dependencies]
+flash-attn = [{requirement = "torch", match-runtime = true}]
+transformer-engine = [{requirement = "torch", match-runtime = true}, "build_tools", "ninja"]
+transformer-engine-torch = [{requirement = "torch", match-runtime = true}, "build_tools", "ninja"]
+
+[tool.uv.extra-build-variables]
+flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE"}
+
+[[tool.uv.index]]
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+[[tool.uv.index]]
+name = "flashinfer-cu128"
+url = "https://flashinfer.ai/whl/cu128"
+explicit = true
+
+[tool.uv.sources]
+skyrl-gym = { path = "./skyrl-gym", editable = true }
+# flashinfer wheels are only available from the custom cu128 index
+flashinfer-jit-cache = { index = "flashinfer-cu128", marker = "sys_platform == 'linux'" }
+# Use CUDA torch on Linux, CPU torch on macOS (must match skyrl-train config)
+torch = [
+    { index = "pytorch-cu128", marker = "sys_platform == 'linux'" },
+    { index = "pytorch-cpu", marker = "sys_platform == 'darwin'" },
+]
+torchvision = [
+    { index = "pytorch-cu128", marker = "sys_platform == 'linux'" },
+    { index = "pytorch-cpu", marker = "sys_platform == 'darwin'" },
+]
+# pin megatron bridge commit to fix for MoE + LoRA merging. Update this when an official release is cut
+megatron-bridge = {git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge", rev = "04e370eedf8cc44a812189a19f2171d90555c07a", marker = "sys_platform == 'linux'"}
 
 [tool.black]
 line-length = 120

diff --git a/skyrl-train/.env.example b/skyrl-train/.env.example
diff --git a/skyrl-train/.env.llm_judge b/skyrl-train/.env.llm_judge
diff --git a/skyrl-train/.gitignore b/skyrl-train/.gitignore