Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,6 @@ cufile.json
# uds tokenizer default tokenizer cache path
services/uds_tokenizer/tokenizers
services/uds_tokenizer/.venv
kv_connectors/llmd_fs_backend/.venv

**/vllm_source
24 changes: 23 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ BUILDER := $(shell command -v buildah >/dev/null 2>&1 && echo buildah || echo $(
UDS_TOKENIZER_IMAGE ?= llm-d-uds-tokenizer:e2e-test
FS_BACKEND_NAME ?= llmd-fs-backend
FS_BACKEND_DEV_IMG ?= $(IMAGE_TAG_BASE)/$(FS_BACKEND_NAME):$(DEV_VERSION)
FS_BACKEND_DIR := kv_connectors/llmd_fs_backend
FS_BACKEND_CPU_TESTS ?= $(FS_BACKEND_DIR)/tests/cpu
FS_BACKEND_VENV_DIR := $(FS_BACKEND_DIR)/.venv
FS_BACKEND_VENV_BIN := $(FS_BACKEND_VENV_DIR)/bin

# go source files
SRC = $(shell find . -type f -name '*.go')
Expand Down Expand Up @@ -65,13 +69,31 @@ clang:
test: unit-test e2e-test ## Run all tests (unit + e2e)

.PHONY: unit-test
unit-test: unit-test-uds ## Run unit tests
unit-test: unit-test-uds unit-test-fs-backend-cpu ## Run unit tests

.PHONY: unit-test-uds
unit-test-uds: check-go download-zmq ## Run unit tests
@printf "\033[33;1m==== Running unit tests ====\033[0m\n"
@go test -v ./pkg/...

.PHONY: fs-backend-cpu-install-deps
fs-backend-cpu-install-deps: ## Set up venv and install FS backend CPU test dependencies
@printf "\033[33;1m==== Setting up FS backend CPU test venv ====\033[0m\n"
@if [ ! -f "$(FS_BACKEND_VENV_BIN)/python" ]; then \
echo "Creating virtual environment in $(FS_BACKEND_VENV_DIR)..."; \
$(PYTHON_EXE) -m venv $(FS_BACKEND_VENV_DIR); \
echo "Upgrading pip..."; \
$(FS_BACKEND_VENV_BIN)/pip install --upgrade pip > /dev/null; \
else \
echo "Virtual environment already exists"; \
fi
@$(FS_BACKEND_VENV_BIN)/pip install -q -r $(FS_BACKEND_DIR)/tests/requirements-cpu.txt

.PHONY: unit-test-fs-backend-cpu
unit-test-fs-backend-cpu: fs-backend-cpu-install-deps ## Run CPU-safe FS backend Python unit tests
@printf "\033[33;1m==== Running CPU-safe FS backend unit tests ====\033[0m\n"
@$(FS_BACKEND_VENV_BIN)/python -m pytest -q $(FS_BACKEND_CPU_TESTS)

.PHONY: unit-test-race
unit-test-race: check-go download-zmq ## Run unit tests with Go race detector enabled
@printf "\033[33;1m==== Running unit tests with race detector ====\033[0m\n"
Expand Down
28 changes: 23 additions & 5 deletions kv_connectors/llmd_fs_backend/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
sys.path.insert(0, str(Path(__file__).parent))

import pytest
import torch
from vllm.config import VllmConfig, set_current_vllm_config


def pytest_addoption(parser):
Expand All @@ -35,20 +33,38 @@ def pytest_addoption(parser):
parser.addoption("--obj-ca_bundle", default=None)


@pytest.fixture(scope="session", autouse=True)
def require_cuda():
def pytest_configure(config):
config.addinivalue_line(
"markers",
"no_cuda_required: mark a test as not requiring CUDA setup/teardown",
)


@pytest.fixture(autouse=True)
def require_cuda(request):
"""Skip all tests in this session if CUDA is not available."""
if request.node.get_closest_marker("no_cuda_required"):
return

import torch

if not torch.cuda.is_available():
pytest.skip("CUDA not available")


@pytest.fixture(autouse=True)
def cuda_teardown():
def cuda_teardown(request):
"""Ensure CUDA and C++ thread-pool resources from one test are fully
released before the next test starts. Without this, async destructors
can cause 'cudaErrorUnknown' or stale file-open errors in subsequent tests.
"""
if request.node.get_closest_marker("no_cuda_required"):
yield
return

yield
import torch

gc.collect() # force Python GC to call C++ destructors immediately
torch.cuda.synchronize() # surface any async CUDA errors in the right test
torch.cuda.empty_cache() # free cached allocations so next test starts clean
Expand All @@ -61,6 +77,8 @@ def default_vllm_config():
that use get_current_vllm_config() outside of a full engine context.
This matches vLLM's internal test fixture pattern.
"""
from vllm.config import VllmConfig, set_current_vllm_config

# Use empty VllmConfig() which provides sensible defaults
with set_current_vllm_config(VllmConfig()):
yield
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
from pathlib import Path

import msgpack
import pytest

CONNECTOR_ROOT = Path(__file__).resolve().parents[1]
pytestmark = pytest.mark.no_cuda_required

CONNECTOR_ROOT = Path(__file__).resolve().parents[2]


class PrepareStoreOutput:
Expand Down
3 changes: 3 additions & 0 deletions kv_connectors/llmd_fs_backend/tests/requirements-cpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pytest
msgpack
pyzmq
Loading