RLsys-Foundation
diff --git a/‎.github/workflows/e2e_genrm_remote.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/e2e_genrm_remote.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/e2e_sft.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e_sft.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/model.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/model.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 0 deletions b/‎README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docker/README.md‎
Lines changed: 2 additions & 0 deletions b/‎docker/README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docker/verl0.6-cu128-torch2.8.0-fa2.7.4/Dockerfile.vllm011.mcore_gpt-oss‎
Lines changed: 15 additions & 0 deletions b/‎docker/verl0.6-cu128-torch2.8.0-fa2.7.4/Dockerfile.vllm011.mcore_gpt-oss‎
Lines changed: 15 additions & 0 deletions
@@ -66,6 +66,7 @@ on:
       - ".github/workflows/e2e_genrm_remote.yml"
       - "examples/data_preprocess/gsm8k.py"
       - "tests/special_e2e/run_genrm_remote.sh"
+      - "tests/special_e2e/generation/run_gen_qwen05_server.sh"
 
 # Cancel jobs on the same ref if a new one is triggered
 concurrency:
@@ -120,6 +121,8 @@ jobs:
         run: |
           ray stop --force
           bash tests/special_e2e/run_genrm_remote.sh
+          ray stop --force
+          bash tests/special_e2e/generation/run_gen_qwen05_server.sh
 
   cleanup:
     runs-on: ubuntu-latest
 
@@ -91,7 +91,7 @@ jobs:
   e2e_sft:
     needs: setup
     runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
-    timeout-minutes: 25 # Increase this timeout value as needed
+    timeout-minutes: 30 # Increase this timeout value as needed
     env:
       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
 
@@ -208,6 +208,7 @@ jobs:
 
       - name: Running mcore engine tests on 8 L20 GPUs
         run: |
+          ray stop --force
           pytest -s -x tests/models/test_engine.py
 
   cleanup:
 
@@ -238,6 +238,9 @@ verl is inspired by the design of Nemo-Aligner, Deepspeed-chat and OpenRLHF. The
 - [Vision-SR1](https://github.com/zli12321/Vision-SR1): Self-Rewarding Vision-Language Model via Reasoning Decomposition ![GitHub Repo stars](https://img.shields.io/github/stars/zli12321/Vision-SR1)
 - [SimpleVLA-RL](https://github.com/PRIME-RL/SimpleVLA-RL): SimpleVLA-RL: A Simple yet Effective Vision-Language Action Model for Reinforcement Learning ![GitHub Repo stars](https://img.shields.io/github/stars/PRIME-RL/SimpleVLA-RL)
 - [Table-R1](https://github.com/Table-R1/Table-R1): Table-R1: Inference-Time Scaling for Table Reasoning ![GitHub Repo stars](https://img.shields.io/github/stars/Table-R1/Table-R1)
+- [Revisual-R1](https://github.com/CSfufu/Revisual-R1): Revisual-R1: Advancing Multimodal Reasoning From Optimized Cold Start to Staged Reinforcement Learning ![GitHub Repo stars](https://img.shields.io/github/stars/CSfufu/Revisual-R1)
+- [ARES](https://github.com/shawn0728/ARES): ARES: Multimodal Adaptive Reasoning via Difficulty-Aware Token-Level Entropy Shaping ![GitHub Repo stars](https://img.shields.io/github/stars/shawn0728/ARES)
+- [Meta-Bandit-LLM](https://github.com/sanxing-chen/meta-bandit-llm): Meta-Bandit-LLM: Long-horizon multiturn interactive training for meta-bandit agents ![GitHub Repo stars](https://img.shields.io/github/stars/sanxing-chen/meta-bandit-llm)
 
 and many more awesome work listed in [recipe](recipe/README.md).
 
 
@@ -36,6 +36,8 @@ For vLLM with FSDP, please refer to [hiyouga/verl](https://hub.docker.com/r/hiyo
 
 For SGLang with FSDP, please refer to [ocss884/verl-sglang](https://hub.docker.com/r/ocss884/verl-sglang) repository and the latest version is ``ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post5`` which is provided by SGLang RL Group.
 
+For latest vLLM with Megatron, please refer to [iseekyan/verl](https://hub.docker.com/r/iseekyan/verl) repository and the latest version is ``iseekyan/verl:nemo.gptoss_vllm0.11.0``.
+
 See files under ``docker/`` for NGC-based image or if you want to build your own.
 
 Note that For aws instances with EFA net interface (Sagemaker AI Pod), you need to install EFA driver as shown in ``docker/Dockerfile.extenstion.awsefa``
 
@@ -0,0 +1,15 @@
+FROM nvcr.io/nvidia/nemo:25.07.gpt_oss
+
+RUN git clone -b v0.11.0 --depth 1 https://github.com/vllm-project/vllm.git /opt/vllm
+
+RUN pip install setuptools_scm
+
+RUN cd /opt/vllm && pip install --no-deps --no-build-isolation --no-cache-dir -e .
+
+RUN pip install cbor2 setproctitle blake3 openai_harmony pybase64 msgspec partial_json_parser py-cpuinfo diskcache gguf
+
+RUN pip install --upgrade transformers tokenizers
+
+RUN pip install codetiming tensordict mathruler pylatexenc
+
+RUN pip3 install --no-cache-dir mbridge