vllm-project · tjohnson31415 · Jan 20, 2026 · Jan 21, 2026 · Jan 22, 2026 · Jan 27, 2026
@@ -179,8 +179,7 @@ jobs:
             uv.lock
             tests/**/*.py
             vllm_spyre/**/*.py
-            vllm_spyre/config/known_model_configs.json
-            vllm_spyre/config/supported_configs.yaml
+            vllm_spyre/config/model_configs.yaml
 
       - name: "Install PyTorch 2.7.1"
         if: steps.changed-src-files.outputs.any_changed == 'true'
@@ -207,7 +206,7 @@ jobs:
       - name: "Install vLLM with Spyre plugin"
         if: steps.changed-src-files.outputs.any_changed == 'true'
         run: |
-          uv venv .venv --system-site-packages
+          uv venv .venv --system-site-packages --clear
           source .venv/bin/activate
 
           # Syncs both the runtime and dev deps, based on the lockfile contents

@@ -45,13 +45,39 @@ configurations.
 [BAAI/BGE-Reranker (Large)]: https://huggingface.co/BAAI/bge-reranker-large
 [Multilingual-E5-large]: https://huggingface.co/intfloat/multilingual-e5-large
 
-## Runtime Validation
+## Model Configuration
 
-At runtime, the Spyre engine validates the requested model and configurations against the list
-of supported models and configurations based on the entries in the file
-<gh-file:vllm_spyre/config/supported_configs.yaml>. If a requested model or configuration
-is not found, a warning message will be logged.
+The Spyre engine uses a model registry to manage model-specific configurations. Model configurations
+are defined in <gh-file:vllm_spyre/config/model_configs.yaml> and include:
 
-```python
---8<-- "vllm_spyre/config/supported_configs.yaml:supported-model-runtime-configurations"
+- Architecture patterns for model matching
+- Device-specific configurations (environment variables, GPU block overrides)
+- Supported runtime configurations (static batching warmup shapes, continuous batching parameters)
+
+When a model is loaded, the registry automatically matches it to the appropriate configuration and
+applies model-specific settings.
+
+### Supported Configurations
+
+The following configurations are supported for each model:
+
+```yaml
+--8<-- "vllm_spyre/config/model_configs.yaml:supported-model-runtime-configuration"
 ```
+
+### Configuration Validation
+
+By default, the Spyre engine will log warnings if a requested model or configuration is not found
+in the registry. To enforce strict validation and fail if an unknown configuration is requested,
+set the environment variable:
+
+```bash
+export VLLM_SPYRE_REQUIRE_KNOWN_CONFIG=1
+```
+
+When this flag is enabled, the engine will raise a `RuntimeError` if:
+
+- The model cannot be matched to a known configuration
+- The requested runtime parameters are not in the supported configurations list
+
+See the [Configuration Guide](configuration.md) for more details on model configuration.
@@ -0,0 +1,3 @@
+"""Tests for vllm_spyre.config module."""
+
+# Made with Bob
@@ -0,0 +1,109 @@
+"""Shared fixtures for config tests."""
+
+import json
+from pathlib import Path
+from unittest.mock import Mock
+
+import pytest
+
+from vllm_spyre.config.model_registry import get_model_registry
+
+# Shared path to test fixtures
+FIXTURES_PATH = Path(__file__).parent.parent / "fixtures" / "model_configs"
+
+
+@pytest.fixture(scope="session")
+def registry():
+    """Fixture providing a registry loaded with real model_configs.yaml."""
+    return get_model_registry()
+
+
+def _load_hf_config(fixture_path: Path) -> Mock:
+    """Helper to load HF config from JSON and convert to Mock object."""
+    with open(fixture_path) as f:
+        config_dict = json.load(f)
+
+    hf_config = Mock()
+    for key, value in config_dict.items():
+        setattr(hf_config, key, value)
+    return hf_config
+
+
+@pytest.fixture
+def granite_3_3_hf_config():
+    """Fixture providing real granite-3.3-8b-instruct HF config."""
+    fixture_path = FIXTURES_PATH / "ibm-granite" / "granite-3.3-8b-instruct" / "config.json"
+    return _load_hf_config(fixture_path)
+
+
+@pytest.fixture
+def granite_4_hf_config():
+    """Fixture providing real granite-4-8b-dense HF config."""
+    fixture_path = FIXTURES_PATH / "ibm-granite" / "granite-4-8b-dense" / "config.json"
+    return _load_hf_config(fixture_path)
+
+
+@pytest.fixture
+def embedding_hf_config():
+    """Fixture providing real granite-embedding-125m-english HF config."""
+    fixture_path = FIXTURES_PATH / "ibm-granite" / "granite-embedding-125m-english" / "config.json"
+    return _load_hf_config(fixture_path)
+
+
+@pytest.fixture
+def micro_model_hf_config():
+    """Fixture providing real micro-g3.3-8b-instruct-1b HF config."""
+    fixture_path = FIXTURES_PATH / "ibm-ai-platform" / "micro-g3.3-8b-instruct-1b" / "config.json"
+    return _load_hf_config(fixture_path)
+
+
+def create_vllm_config(
+    hf_config=None,
+    world_size=1,
+    max_model_len=None,
+    max_num_seqs=None,
+    max_num_batched_tokens=None,
+    num_gpu_blocks_override=None,
+    model_path=None,
+):
+    """Create a mock vllm_config for testing.
+
+    Args:
+        hf_config: HF config object (Mock or real)
+        world_size: Tensor parallel size
+        max_model_len: Maximum model length
+        max_num_seqs: Max sequences (None for static batching)
+        max_num_batched_tokens: Max batched tokens
+        num_gpu_blocks_override: GPU blocks override value
+        model_path: Model path string
+
+    Returns:
+        Mock vllm_config with specified attributes
+    """
+    vllm_config = Mock()
+
+    # Model config
+    model_config_attrs = {}
+    if hf_config is not None:
+        model_config_attrs["hf_config"] = hf_config
+    if max_model_len is not None:
+        model_config_attrs["max_model_len"] = max_model_len
+    if model_path is not None:
+        model_config_attrs["model"] = model_path
+    vllm_config.model_config = Mock(**model_config_attrs)
+
+    # Parallel config
+    vllm_config.parallel_config = Mock(world_size=world_size)
+
+    # Scheduler config
+    vllm_config.scheduler_config = Mock(
+        max_num_seqs=max_num_seqs, max_num_batched_tokens=max_num_batched_tokens
+    )
+
+    # Cache config
+    vllm_config.cache_config = Mock(num_gpu_blocks_override=num_gpu_blocks_override)
+
+    return vllm_config
+
+
+# Made with Bob
@@ -0,0 +1,30 @@
+# Test fixture for error handling tests
+# Minimal model configuration for testing registry behavior
+
+models:
+  # Test model for error handling scenarios
+  test-granite-model:
+    architecture:
+      model_type: granite
+      num_hidden_layers: 40
+      max_position_embeddings: 131072
+      hidden_size: 4096
+      vocab_size: 49159
+      num_key_value_heads: 8
+      num_attention_heads: 32
+
+    # Continuous batching configurations
+    continuous_batching_configs:
+      - tp_size: 1
+        max_model_len: 8192
+        max_num_seqs: 4
+      - tp_size: 4
+        max_model_len: 32768
+        max_num_seqs: 32
+        device_config:
+          env_vars:
+            VLLM_DT_MAX_BATCH_TKV_LIMIT: 131072
+          num_gpu_blocks_override:
+            default: 8192
+
+# Made with Bob
@@ -0,0 +1,111 @@
+"""Tests for environment variable configuration of model_configs.yaml path."""
+
+import os
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from vllm_spyre import envs
+from vllm_spyre.config.model_registry import ModelConfigRegistry
+
+
+@pytest.fixture
+def temp_config_file():
+    """Create a temporary config file for testing."""
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+        f.write("""
+models:
+  env-test-model:
+    architecture:
+      model_type: test_env
+    continuous_batching_configs:
+      - tp_size: 1
+        max_model_len: 4096
+        max_num_seqs: 16
+""")
+        temp_path = f.name
+
+    yield temp_path
+    os.unlink(temp_path)
+
+
+@pytest.fixture(autouse=True)
+def clear_registry():
+    """Clear registry singleton between tests."""
+    ModelConfigRegistry._instance = None
+    ModelConfigRegistry._initialized = False
+    envs.clear_env_cache()
+    yield
+    ModelConfigRegistry._instance = None
+    ModelConfigRegistry._initialized = False
+    envs.clear_env_cache()
+
+
+class TestEnvVarConfigPath:
+    """Tests for VLLM_SPYRE_MODEL_CONFIG_FILE environment variable."""
+
+    def test_env_var_overrides_default(self, temp_config_file):
+        """Test that env var is used when no explicit path provided."""
+        with patch.dict(os.environ, {"VLLM_SPYRE_MODEL_CONFIG_FILE": temp_config_file}):
+            envs.clear_env_cache()
+            registry = ModelConfigRegistry.get_instance()
+            registry.initialize()
+
+            assert "env-test-model" in registry.list_models()
+
+    def test_explicit_path_takes_precedence(self, temp_config_file):
+        """Test priority order: explicit path > env var > default."""
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write("""
+models:
+  explicit-model:
+    architecture:
+      model_type: test_explicit
+    continuous_batching_configs:
+      - tp_size: 1
+        max_model_len: 8192
+        max_num_seqs: 32
+""")
+            explicit_path = f.name
+
+        try:
+            with patch.dict(os.environ, {"VLLM_SPYRE_MODEL_CONFIG_FILE": temp_config_file}):
+                envs.clear_env_cache()
+                registry = ModelConfigRegistry.get_instance()
+                registry.initialize(config_path=Path(explicit_path))
+
+                models = registry.list_models()
+                assert "explicit-model" in models
+                assert "env-test-model" not in models
+        finally:
+            os.unlink(explicit_path)
+
+    def test_nonexistent_file_raises_error(self):
+        """Test that nonexistent file raises FileNotFoundError."""
+        with patch.dict(os.environ, {"VLLM_SPYRE_MODEL_CONFIG_FILE": "/tmp/nonexistent.yaml"}):
+            envs.clear_env_cache()
+            registry = ModelConfigRegistry.get_instance()
+
+            with pytest.raises(FileNotFoundError, match="Model configuration file not found"):
+                registry.initialize()
+
+    def test_empty_file_creates_empty_registry(self):
+        """Test that empty YAML file results in empty registry."""
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write("")
+            empty_path = f.name
+
+        try:
+            with patch.dict(os.environ, {"VLLM_SPYRE_MODEL_CONFIG_FILE": empty_path}):
+                envs.clear_env_cache()
+                registry = ModelConfigRegistry.get_instance()
+                registry.initialize()
+
+                assert len(registry.list_models()) == 0
+        finally:
+            os.unlink(empty_path)
+
+
+# Made with Bob
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		"""Tests for vllm_spyre.config module."""

		# Made with Bob