RBLN-SW · junstar92 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 19, 2026
diff --git a/.github/workflows/rbln_optimum_ci.yaml b/.github/workflows/rbln_optimum_ci.yaml
@@ -89,6 +89,13 @@ jobs:
         run: |
           git config --global --add safe.directory "$GITHUB_WORKSPACE"
 
+      - name: Parse vllm-cpu index URL from pyproject.toml
+        id: parse_index_url
+        run: |
+          VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
+          echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
+          echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT
+
       - name: Uninstall existing vllm-rbln
         run: |
           uv pip uninstall --system vllm-rbln
@@ -107,13 +114,14 @@ jobs:
           retry_on: any
           shell: bash
           command: |
+            VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
             uv pip install --system packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio
             VERSION=${{ inputs.optimum_rbln_version }}
             if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
-              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
               echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
             else
-              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
             fi
 
       - name : Run Qwen3-embedding (V1)
@@ -184,6 +192,13 @@ jobs:
         run: |
           git config --global --add safe.directory "$GITHUB_WORKSPACE"
 
+      - name: Parse vllm-cpu index URL from pyproject.toml
+        id: parse_index_url
+        run: |
+          VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
+          echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
+          echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT
+
       - name: Uninstall existing vllm-rbln
         run: |
           uv pip uninstall --system vllm-rbln
@@ -202,13 +217,14 @@ jobs:
           retry_on: any
           shell: bash
           command: |
+            VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
             uv pip install --system packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio
             VERSION=${{ inputs.optimum_rbln_version }}
             if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
-              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
               echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
             else
-              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
             fi
 
       - name: Run decoder-only test (eager attn) (V1)
@@ -304,6 +320,13 @@ jobs:
         run: |
           git config --global --add safe.directory "$GITHUB_WORKSPACE"
 
+      - name: Parse vllm-cpu index URL from pyproject.toml
+        id: parse_index_url
+        run: |
+          VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
+          echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
+          echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT
+
       - name: Uninstall existing vllm-rbln
         run: |
           uv pip uninstall --system vllm-rbln
@@ -322,13 +345,14 @@ jobs:
           retry_on: any
           shell: bash
           command: |
+            VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
             uv pip install --system packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio
             VERSION=${{ inputs.optimum_rbln_version }}
             if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
-              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
               echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
             else
-              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
             fi
 
       - name : Run Qwen2.5_VL (V1)

diff --git a/.github/workflows/rbln_trigger_fsw_pr_ci.yaml b/.github/workflows/rbln_trigger_fsw_pr_ci.yaml
@@ -33,7 +33,7 @@ jobs:
           with: |
             token: ${{ secrets.GIT_PAT }}
             repository: rebellions-sw/fsw-integration
-            event-type: ${{ github.event.pull_request.base.ref == 'dev-0.12' && 'vllm-rbln-pr-event-for-0.12' || 'vllm-rbln-pr-event' }}
+            event-type: ${{ github.event.pull_request.base.ref == 'dev-0.18' && 'vllm-rbln-pr-event-for-0.18' || 'vllm-rbln-pr-event' }}
             client-payload: |
               {
                 "ref": "${{ github.event.pull_request.head.ref }}",

diff --git a/.github/workflows/rbln_trigger_on_pr.yaml b/.github/workflows/rbln_trigger_on_pr.yaml
@@ -4,6 +4,7 @@ on:
   pull_request:
     branches:
       - dev
+      - dev-0.18
 
 jobs:
   check-skip-ci:

diff --git a/.github/workflows/rbln_utils_get_compiler_version_ci.yaml b/.github/workflows/rbln_utils_get_compiler_version_ci.yaml
@@ -18,10 +18,12 @@ jobs:
       env:
         GH_TOKEN: ${{ secrets.GIT_PAT }}
       run: |
-        RUN_ID=$(curl -s -H "Authorization: Bearer $GH_TOKEN" \
+        RESPONSE=$(curl -s -H "Authorization: Bearer $GH_TOKEN" \
           -H "Accept: application/vnd.github+json" \
-          "https://api.github.com/repos/rbln-sw/vllm-rbln/actions/workflows/rbln_utils_set_compiler_version_ci.yaml/runs?status=success&per_page=1" \
-          | jq -r '.workflow_runs[0].id')
+          "https://api.github.com/repos/rbln-sw/vllm-rbln/actions/workflows/rbln_utils_set_compiler_version_ci.yaml/runs?status=success&per_page=1")
+        echo "API Response: $RESPONSE"
+
+        RUN_ID=$(echo "$RESPONSE" | jq -r '.workflow_runs[0].id')
         echo "Selected RUN_ID=${RUN_ID}"
 
         if [ "$RUN_ID" = "null" ] || [ -z "$RUN_ID" ]; then

diff --git a/.github/workflows/rbln_vllm-rbln_pytest.yaml b/.github/workflows/rbln_vllm-rbln_pytest.yaml
@@ -81,6 +81,13 @@ jobs:
           submodules: recursive
           fetch-depth: 0
 
+      - name: Parse vllm-cpu index URL from pyproject.toml
+        id: parse_index_url
+        run: |
+          VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
+          echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
+          echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT
+
       - name: Get commit message
         id: get_commit_message
         run: |
@@ -149,12 +156,13 @@ jobs:
       - name: Install local vllm-rbln package and dependencies
         if: steps.should_skip.outputs.skip != 'true'
         run: |
+          VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
           pip install packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio
           if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
-            pip install --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --trusted-host pypi-cache.devpi.svc.cluster.local
+            pip install --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --trusted-host pypi-cache.devpi.svc.cluster.local
             echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
           else
-            pip install --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --trusted-host pypi-cache.devpi.svc.cluster.local
+            pip install --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --trusted-host pypi-cache.devpi.svc.cluster.local
           fi
 
       - name: Run pytest
@@ -211,6 +219,13 @@ jobs:
         run: |
           git config --global --add safe.directory "$GITHUB_WORKSPACE"
 
+      - name: Parse vllm-cpu index URL from pyproject.toml
+        id: parse_index_url
+        run: |
+          VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
+          echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
+          echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT
+
       - name: Get commit message
         id: get_commit_message
         run: |
@@ -284,12 +299,13 @@ jobs:
           retry_on: any
           shell: bash
           command: |
+            VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
             uv pip install --system packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio pytest-xdist
             if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
-              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
               echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
             else
-              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+              uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
             fi
 
       - name: Run pytest

diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ You can install this project using `pip` or from source.
 #### Install via PyPI
 
 ```bash
-pip install vllm-rbln --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu
+pip install vllm-rbln --extra-index-url https://wheels.vllm.ai/0.18.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu
 ```
 
 #### Or from source
@@ -46,7 +46,7 @@ uv pip install -e .
 ```bash
 git clone https://github.com/rbln-sw/vllm-rbln.git
 cd vllm-rbln
-pip install -e . --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu
+pip install -e . --extra-index-url https://wheels.vllm.ai/0.18.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu
 ```
 
 ### 📚 Documentation