Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b405450
chore: upgrade vllm dependency from 0.13.0 to 0.17.1
junstar92 Mar 18, 2026
fde3ba9
fix: bump RBLNScheduler to v0.17.1
huijjj Mar 18, 2026
3c6245d
fix: introduce missing _get_slot_mappings helper and propagate slot_m…
junstar92 Mar 18, 2026
676d4c1
fix: return compilation time after warmup
huijjj Mar 19, 2026
0bd5d7f
fix: scheduler tests
huijjj Mar 23, 2026
8b32c7f
chore: upgrade vllm dependency from 0.17.1 to 0.18.0
junstar92 Mar 24, 2026
12abb89
fix: bump scheduler codes and tests to v0.18.0
huijjj Mar 24, 2026
6c374cd
fix: bug related to sampler warm-up for pipeline parallel (#485)
rebel-ykchoi Mar 26, 2026
1822471
change pyproject.toml (#484)
rebel-eunji Mar 26, 2026
719127f
docs: update readme (#486)
rebel-jiwoopark Mar 27, 2026
d02ff99
fix: update ci branch (#483)
rebel-seinpark Mar 27, 2026
18a762a
fix(scheduler): use prefill_token_budget for new request chunk size (…
rebel-jaehwang Mar 31, 2026
95d4353
other: ignore minor error in prefix cache example script (#500)
rebel-jaehwang Mar 31, 2026
563c291
refactor(test): Seperate unit test and e2e test for torch compile pat…
rebel-jinhwan Apr 1, 2026
d674306
other(test): add structured decoding unit/e2e tests (#498)
rebel-jinhwan Apr 1, 2026
980504b
fix(kernel): fixed argument lists for swa in 0.18 (#502)
rebel-jindol21 Apr 2, 2026
e581f96
other(log): fix warning message related to env variables (#505)
rebel-jiwoopark Apr 2, 2026
ab677e4
feature: sub-block prefix caching (#442)
rebel-jaehwang Apr 3, 2026
d2c77b1
fix: restore PP-related code dropped in v0.13 bump (#507)
rebel-ykchoi Apr 3, 2026
124b33a
test: update LoRA tests for vLLM 0.18.0 and relocate some tests to th…
junstar92 Apr 2, 2026
c2127b5
fix: update VocabParallelEmbeddingWithLoRA forward for vLLM v0.18.0
junstar92 Apr 2, 2026
42e9d5b
test: add basic SQL LoRA e2e test and tune compile limits
junstar92 Apr 2, 2026
312c9c2
test: mark embedding LoRA path as expected failure due to numerical i…
junstar92 Apr 2, 2026
0c2163f
chore: remove unused environment variable for LoRA test script
junstar92 Apr 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions .github/workflows/rbln_optimum_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ jobs:
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"

- name: Parse vllm-cpu index URL from pyproject.toml
id: parse_index_url
run: |
VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT

- name: Uninstall existing vllm-rbln
run: |
uv pip uninstall --system vllm-rbln
Expand All @@ -107,13 +114,14 @@ jobs:
retry_on: any
shell: bash
command: |
VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
uv pip install --system packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio
VERSION=${{ inputs.optimum_rbln_version }}
if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
else
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
fi

- name : Run Qwen3-embedding (V1)
Expand Down Expand Up @@ -184,6 +192,13 @@ jobs:
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"

- name: Parse vllm-cpu index URL from pyproject.toml
id: parse_index_url
run: |
VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT

- name: Uninstall existing vllm-rbln
run: |
uv pip uninstall --system vllm-rbln
Expand All @@ -202,13 +217,14 @@ jobs:
retry_on: any
shell: bash
command: |
VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
uv pip install --system packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio
VERSION=${{ inputs.optimum_rbln_version }}
if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
else
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
fi

- name: Run decoder-only test (eager attn) (V1)
Expand Down Expand Up @@ -304,6 +320,13 @@ jobs:
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"

- name: Parse vllm-cpu index URL from pyproject.toml
id: parse_index_url
run: |
VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT

- name: Uninstall existing vllm-rbln
run: |
uv pip uninstall --system vllm-rbln
Expand All @@ -322,13 +345,14 @@ jobs:
retry_on: any
shell: bash
command: |
VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
uv pip install --system packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio
VERSION=${{ inputs.optimum_rbln_version }}
if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
else
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
fi

- name : Run Qwen2.5_VL (V1)
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/rbln_trigger_fsw_pr_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
with: |
token: ${{ secrets.GIT_PAT }}
repository: rebellions-sw/fsw-integration
event-type: ${{ github.event.pull_request.base.ref == 'dev-0.12' && 'vllm-rbln-pr-event-for-0.12' || 'vllm-rbln-pr-event' }}
event-type: ${{ github.event.pull_request.base.ref == 'dev-0.18' && 'vllm-rbln-pr-event-for-0.18' || 'vllm-rbln-pr-event' }}
client-payload: |
{
"ref": "${{ github.event.pull_request.head.ref }}",
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/rbln_trigger_on_pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
pull_request:
branches:
- dev
- dev-0.18

jobs:
check-skip-ci:
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/rbln_utils_get_compiler_version_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ jobs:
env:
GH_TOKEN: ${{ secrets.GIT_PAT }}
run: |
RUN_ID=$(curl -s -H "Authorization: Bearer $GH_TOKEN" \
RESPONSE=$(curl -s -H "Authorization: Bearer $GH_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/rbln-sw/vllm-rbln/actions/workflows/rbln_utils_set_compiler_version_ci.yaml/runs?status=success&per_page=1" \
| jq -r '.workflow_runs[0].id')
"https://api.github.com/repos/rbln-sw/vllm-rbln/actions/workflows/rbln_utils_set_compiler_version_ci.yaml/runs?status=success&per_page=1")
echo "API Response: $RESPONSE"

RUN_ID=$(echo "$RESPONSE" | jq -r '.workflow_runs[0].id')
echo "Selected RUN_ID=${RUN_ID}"

if [ "$RUN_ID" = "null" ] || [ -z "$RUN_ID" ]; then
Expand Down
24 changes: 20 additions & 4 deletions .github/workflows/rbln_vllm-rbln_pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ jobs:
submodules: recursive
fetch-depth: 0

- name: Parse vllm-cpu index URL from pyproject.toml
id: parse_index_url
run: |
VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT

- name: Get commit message
id: get_commit_message
run: |
Expand Down Expand Up @@ -149,12 +156,13 @@ jobs:
- name: Install local vllm-rbln package and dependencies
if: steps.should_skip.outputs.skip != 'true'
run: |
VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
pip install packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio
if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
pip install --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --trusted-host pypi-cache.devpi.svc.cluster.local
pip install --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --trusted-host pypi-cache.devpi.svc.cluster.local
echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
else
pip install --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --trusted-host pypi-cache.devpi.svc.cluster.local
pip install --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --trusted-host pypi-cache.devpi.svc.cluster.local
fi

- name: Run pytest
Expand Down Expand Up @@ -211,6 +219,13 @@ jobs:
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"

- name: Parse vllm-cpu index URL from pyproject.toml
id: parse_index_url
run: |
VLLM_CPU_URL=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); config=tomllib.load(f); print(next(i['url'] for i in config.get('tool',{}).get('uv',{}).get('index',[]) if i.get('name')=='vllm-cpu'))")
echo "Parsed vllm-cpu URL: ${VLLM_CPU_URL}"
echo "VLLM_CPU_URL=${VLLM_CPU_URL}" >> $GITHUB_OUTPUT

- name: Get commit message
id: get_commit_message
run: |
Expand Down Expand Up @@ -284,12 +299,13 @@ jobs:
retry_on: any
shell: bash
command: |
VLLM_CPU_URL="${{ steps.parse_index_url.outputs.VLLM_CPU_URL }}"
uv pip install --system packaging setuptools wheel simphile pynvml huggingface_hub setuptools_scm fire pytest pytest-asyncio pytest-xdist
if [ -n "${{ inputs.optimum_rbln_version }}" ]; then
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --constraint <(echo "optimum-rbln==${{ inputs.optimum_rbln_version }}") --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
echo "optimum-rbln ${{ inputs.optimum_rbln_version }}"
else
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install --system --force-reinstall --no-cache-dir dist/vllm_rbln*.whl --index-url http://pypi-cache.devpi.svc.cluster.local/root/pypi/+simple/ --extra-index-url "${VLLM_CPU_URL}" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
fi

- name: Run pytest
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ You can install this project using `pip` or from source.
#### Install via PyPI

```bash
pip install vllm-rbln --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu
pip install vllm-rbln --extra-index-url https://wheels.vllm.ai/0.18.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu
```

#### Or from source
Expand All @@ -46,7 +46,7 @@ uv pip install -e .
```bash
git clone https://github.com/rbln-sw/vllm-rbln.git
cd vllm-rbln
pip install -e . --extra-index-url https://wheels.vllm.ai/0.13.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu
pip install -e . --extra-index-url https://wheels.vllm.ai/0.18.0/cpu --extra-index-url https://download.pytorch.org/whl/cpu
```

### πŸ“š Documentation
Expand Down
Loading
Loading