feat(backend): add vllm support #513
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| pull_request: | |
| branches: [ main ] | |
| jobs: | |
| test: | |
| runs-on: macos-15 | |
| strategy: | |
| matrix: | |
| python-version: ['3.11'] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Cache pip dependencies | |
| uses: actions/cache@v3 | |
| with: | |
| path: ~/.cache/pip | |
| key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }} | |
| restore-keys: | | |
| ${{ runner.os }}-pip-${{ matrix.python-version }}- | |
| ${{ runner.os }}-pip- | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -e '.[mac, dev]' | |
| - name: Run tests | |
| run: pytest tests/ -v --cov=src/parallax --cov-report=xml | |
| - name: Upload coverage to Codecov | |
| uses: codecov/codecov-action@v3 | |
| with: | |
| file: ./coverage.xml | |
| fail_ci_if_error: false | |
| - name: Run E2E tests | |
| run: | | |
| python src/parallax/launch.py \ | |
| --model-path Qwen/Qwen3-0.6B-MLX-bf16 \ | |
| --max-num-tokens-per-batch 16384 \ | |
| --kv-block-size 1024 \ | |
| --max-batch-size 128 \ | |
| --start-layer 0 \ | |
| --end-layer 28 & | |
| PID=$! | |
| sleep 60 | |
| curl --location 'http://localhost:3000/v1/chat/completions' --header 'Content-Type: application/json' --data '{ | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": "What is the capital of France" | |
| } | |
| ], | |
| "stream": false, | |
| "max_tokens": 1024, | |
| "chat_template_kwargs": {"enable_thinking": false}, | |
| "sampling_params": { | |
| "top_k": 3 | |
| } | |
| }' | |
| kill $PID 2>/dev/null || true | |
| wait $PID 2>/dev/null || echo -e "\nE2E test completed after 60 seconds" |