Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
c9fc619
:robot: intiial model configuration refactor by Bob
tjohnson31415 Jan 20, 2026
8a39277
additional changes for new model config registry
tjohnson31415 Jan 21, 2026
e47abb6
refactor: separate static and continuous configs and some other cleanup
tjohnson31415 Jan 22, 2026
9c5532b
test: 100% code coverage of vllm_spyre/config courtesy of Bob
tjohnson31415 Jan 27, 2026
dd6414a
refactor: update tracking and summarization log from configurator
tjohnson31415 Jan 27, 2026
4acff78
update and delint config module README
tjohnson31415 Jan 28, 2026
12b71b3
fix gha test workflow
tjohnson31415 Jan 28, 2026
35e23a5
tests: config tests should use public methods
tjohnson31415 Jan 28, 2026
9fd8b29
review: simplfying and clarifying changes from Bob review
tjohnson31415 Jan 28, 2026
b8d7084
refactor: give warmup shapes a dataclass
tjohnson31415 Jan 29, 2026
6c0015e
markdownlint supported_models.py
tjohnson31415 Jan 29, 2026
1712f34
:fire: remove max_num_batched_tokens from model registry
tjohnson31415 Jan 30, 2026
0321d52
test: tweak to cleanup fixture
tjohnson31415 Jan 30, 2026
6c8169a
test: fix fixtures in test_integration
tjohnson31415 Jan 30, 2026
297df5a
fix: add complexity score to correctly match FP8 models from the regi…
tjohnson31415 Jan 30, 2026
216b5fd
update templates / references in model_configs.yamll
tjohnson31415 Jan 30, 2026
fa89b0a
remove test/models/test_granite.py again...
tjohnson31415 Feb 2, 2026
7e8998f
test: refactor fixtures in test_error_handling
tjohnson31415 Feb 2, 2026
9efeffe
more test cleanup
tjohnson31415 Feb 2, 2026
a2b8858
fix: check static batching config first in registry lookup
tjohnson31415 Feb 2, 2026
e1d29e3
fix: add TP check to granite chunk size override/default
tjohnson31415 Feb 4, 2026
85fe58a
review: rename ConfigValue fields to default and applied
tjohnson31415 Feb 4, 2026
41f28e7
review: reduce test overlap in test_integration and use more fixtures
tjohnson31415 Feb 5, 2026
f1f115d
Merge branch 'main' into model-configurator
tjohnson31415 Feb 5, 2026
6d4e9a3
refactor: remove complexity_score in favor of field count and don't a…
tjohnson31415 Feb 5, 2026
a3fedf8
feat: add support for VLLM_SPYRE_MODEL_CONFIG_FILE
tjohnson31415 Feb 5, 2026
9d62737
fixup: manual formatting fix
tjohnson31415 Feb 5, 2026
b6db838
try adding --clear to uv venv in GHA test workflow
tjohnson31415 Feb 5, 2026
5cd255f
fmt: markdownlint README.md update
tjohnson31415 Feb 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ jobs:
uv.lock
tests/**/*.py
vllm_spyre/**/*.py
vllm_spyre/config/known_model_configs.json
vllm_spyre/config/supported_configs.yaml
vllm_spyre/config/model_configs.yaml

- name: "Install PyTorch 2.7.1"
if: steps.changed-src-files.outputs.any_changed == 'true'
Expand All @@ -207,7 +206,7 @@ jobs:
- name: "Install vLLM with Spyre plugin"
if: steps.changed-src-files.outputs.any_changed == 'true'
run: |
uv venv .venv --system-site-packages
uv venv .venv --system-site-packages --clear
source .venv/bin/activate

# Syncs both the runtime and dev deps, based on the lockfile contents
Expand Down
40 changes: 33 additions & 7 deletions docs/user_guide/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,39 @@ configurations.
[BAAI/BGE-Reranker (Large)]: https://huggingface.co/BAAI/bge-reranker-large
[Multilingual-E5-large]: https://huggingface.co/intfloat/multilingual-e5-large

## Runtime Validation
## Model Configuration

At runtime, the Spyre engine validates the requested model and configurations against the list
of supported models and configurations based on the entries in the file
<gh-file:vllm_spyre/config/supported_configs.yaml>. If a requested model or configuration
is not found, a warning message will be logged.
The Spyre engine uses a model registry to manage model-specific configurations. Model configurations
are defined in <gh-file:vllm_spyre/config/model_configs.yaml> and include:

```python
--8<-- "vllm_spyre/config/supported_configs.yaml:supported-model-runtime-configurations"
- Architecture patterns for model matching
- Device-specific configurations (environment variables, GPU block overrides)
- Supported runtime configurations (static batching warmup shapes, continuous batching parameters)

When a model is loaded, the registry automatically matches it to the appropriate configuration and
applies model-specific settings.

### Supported Configurations

The following configurations are supported for each model:

```yaml
--8<-- "vllm_spyre/config/model_configs.yaml:supported-model-runtime-configuration"
```

### Configuration Validation

By default, the Spyre engine will log warnings if a requested model or configuration is not found
in the registry. To enforce strict validation and fail if an unknown configuration is requested,
set the environment variable:

```bash
export VLLM_SPYRE_REQUIRE_KNOWN_CONFIG=1
```

When this flag is enabled, the engine will raise a `RuntimeError` if:

- The model cannot be matched to a known configuration
- The requested runtime parameters are not in the supported configurations list

See the [Configuration Guide](configuration.md) for more details on model configuration.
3 changes: 3 additions & 0 deletions tests/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Tests for vllm_spyre.config module."""

# Made with Bob
109 changes: 109 additions & 0 deletions tests/config/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""Shared fixtures for config tests."""

import json
from pathlib import Path
from unittest.mock import Mock

import pytest

from vllm_spyre.config.model_registry import get_model_registry

# Shared path to test fixtures
FIXTURES_PATH = Path(__file__).parent.parent / "fixtures" / "model_configs"


@pytest.fixture(scope="session")
def registry():
"""Fixture providing a registry loaded with real model_configs.yaml."""
return get_model_registry()


def _load_hf_config(fixture_path: Path) -> Mock:
"""Helper to load HF config from JSON and convert to Mock object."""
with open(fixture_path) as f:
config_dict = json.load(f)

hf_config = Mock()
for key, value in config_dict.items():
setattr(hf_config, key, value)
return hf_config


@pytest.fixture
def granite_3_3_hf_config():
"""Fixture providing real granite-3.3-8b-instruct HF config."""
fixture_path = FIXTURES_PATH / "ibm-granite" / "granite-3.3-8b-instruct" / "config.json"
return _load_hf_config(fixture_path)


@pytest.fixture
def granite_4_hf_config():
"""Fixture providing real granite-4-8b-dense HF config."""
fixture_path = FIXTURES_PATH / "ibm-granite" / "granite-4-8b-dense" / "config.json"
return _load_hf_config(fixture_path)


@pytest.fixture
def embedding_hf_config():
"""Fixture providing real granite-embedding-125m-english HF config."""
fixture_path = FIXTURES_PATH / "ibm-granite" / "granite-embedding-125m-english" / "config.json"
return _load_hf_config(fixture_path)


@pytest.fixture
def micro_model_hf_config():
"""Fixture providing real micro-g3.3-8b-instruct-1b HF config."""
fixture_path = FIXTURES_PATH / "ibm-ai-platform" / "micro-g3.3-8b-instruct-1b" / "config.json"
return _load_hf_config(fixture_path)


def create_vllm_config(
hf_config=None,
world_size=1,
max_model_len=None,
max_num_seqs=None,
max_num_batched_tokens=None,
num_gpu_blocks_override=None,
model_path=None,
):
"""Create a mock vllm_config for testing.

Args:
hf_config: HF config object (Mock or real)
world_size: Tensor parallel size
max_model_len: Maximum model length
max_num_seqs: Max sequences (None for static batching)
max_num_batched_tokens: Max batched tokens
num_gpu_blocks_override: GPU blocks override value
model_path: Model path string

Returns:
Mock vllm_config with specified attributes
"""
vllm_config = Mock()

# Model config
model_config_attrs = {}
if hf_config is not None:
model_config_attrs["hf_config"] = hf_config
if max_model_len is not None:
model_config_attrs["max_model_len"] = max_model_len
if model_path is not None:
model_config_attrs["model"] = model_path
vllm_config.model_config = Mock(**model_config_attrs)

# Parallel config
vllm_config.parallel_config = Mock(world_size=world_size)

# Scheduler config
vllm_config.scheduler_config = Mock(
max_num_seqs=max_num_seqs, max_num_batched_tokens=max_num_batched_tokens
)

# Cache config
vllm_config.cache_config = Mock(num_gpu_blocks_override=num_gpu_blocks_override)

return vllm_config


# Made with Bob
30 changes: 30 additions & 0 deletions tests/config/fixtures/test_error_handling_models.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Test fixture for error handling tests
# Minimal model configuration for testing registry behavior

models:
# Test model for error handling scenarios
test-granite-model:
architecture:
model_type: granite
num_hidden_layers: 40
max_position_embeddings: 131072
hidden_size: 4096
vocab_size: 49159
num_key_value_heads: 8
num_attention_heads: 32

# Continuous batching configurations
continuous_batching_configs:
- tp_size: 1
max_model_len: 8192
max_num_seqs: 4
- tp_size: 4
max_model_len: 32768
max_num_seqs: 32
device_config:
env_vars:
VLLM_DT_MAX_BATCH_TKV_LIMIT: 131072
num_gpu_blocks_override:
default: 8192

# Made with Bob
111 changes: 111 additions & 0 deletions tests/config/test_env_config_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Tests for environment variable configuration of model_configs.yaml path."""

import os
import tempfile
from pathlib import Path
from unittest.mock import patch

import pytest

from vllm_spyre import envs
from vllm_spyre.config.model_registry import ModelConfigRegistry


@pytest.fixture
def temp_config_file():
"""Create a temporary config file for testing."""
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
f.write("""
models:
env-test-model:
architecture:
model_type: test_env
continuous_batching_configs:
- tp_size: 1
max_model_len: 4096
max_num_seqs: 16
""")
temp_path = f.name

yield temp_path
os.unlink(temp_path)


@pytest.fixture(autouse=True)
def clear_registry():
"""Clear registry singleton between tests."""
ModelConfigRegistry._instance = None
ModelConfigRegistry._initialized = False
envs.clear_env_cache()
yield
ModelConfigRegistry._instance = None
ModelConfigRegistry._initialized = False
envs.clear_env_cache()


class TestEnvVarConfigPath:
"""Tests for VLLM_SPYRE_MODEL_CONFIG_FILE environment variable."""

def test_env_var_overrides_default(self, temp_config_file):
"""Test that env var is used when no explicit path provided."""
with patch.dict(os.environ, {"VLLM_SPYRE_MODEL_CONFIG_FILE": temp_config_file}):
envs.clear_env_cache()
registry = ModelConfigRegistry.get_instance()
registry.initialize()

assert "env-test-model" in registry.list_models()

def test_explicit_path_takes_precedence(self, temp_config_file):
"""Test priority order: explicit path > env var > default."""
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
f.write("""
models:
explicit-model:
architecture:
model_type: test_explicit
continuous_batching_configs:
- tp_size: 1
max_model_len: 8192
max_num_seqs: 32
""")
explicit_path = f.name

try:
with patch.dict(os.environ, {"VLLM_SPYRE_MODEL_CONFIG_FILE": temp_config_file}):
envs.clear_env_cache()
registry = ModelConfigRegistry.get_instance()
registry.initialize(config_path=Path(explicit_path))

models = registry.list_models()
assert "explicit-model" in models
assert "env-test-model" not in models
finally:
os.unlink(explicit_path)

def test_nonexistent_file_raises_error(self):
"""Test that nonexistent file raises FileNotFoundError."""
with patch.dict(os.environ, {"VLLM_SPYRE_MODEL_CONFIG_FILE": "/tmp/nonexistent.yaml"}):
envs.clear_env_cache()
registry = ModelConfigRegistry.get_instance()

with pytest.raises(FileNotFoundError, match="Model configuration file not found"):
registry.initialize()

def test_empty_file_creates_empty_registry(self):
"""Test that empty YAML file results in empty registry."""
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
f.write("")
empty_path = f.name

try:
with patch.dict(os.environ, {"VLLM_SPYRE_MODEL_CONFIG_FILE": empty_path}):
envs.clear_env_cache()
registry = ModelConfigRegistry.get_instance()
registry.initialize()

assert len(registry.list_models()) == 0
finally:
os.unlink(empty_path)


# Made with Bob
Loading