Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 158 additions & 15 deletions tests/llama_stack/conftest.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
import os
import tempfile
from typing import Generator, Any, Dict

import portforward
import pytest
import requests
from _pytest.fixtures import FixtureRequest
from kubernetes.dynamic import DynamicClient
from llama_stack_client import LlamaStackClient
from llama_stack_client.types.vector_store import VectorStore
from ocp_resources.data_science_cluster import DataScienceCluster
from ocp_resources.deployment import Deployment
from ocp_resources.llama_stack_distribution import LlamaStackDistribution
from ocp_resources.namespace import Namespace
from simple_logger.logger import get_logger
from timeout_sampler import retry

from tests.llama_stack.utils import create_llama_stack_distribution, wait_for_llama_stack_client_ready
from utilities.constants import DscComponents, Timeout
from utilities.data_science_cluster_utils import update_components_in_dsc
from utilities.rag_utils import ModelInfo


LOGGER = get_logger(name=__name__)
Expand Down Expand Up @@ -43,19 +48,18 @@ def llama_stack_server_config(
vllm_api_token = os.getenv("LLS_CORE_VLLM_API_TOKEN", "")
vllm_url = os.getenv("LLS_CORE_VLLM_URL", "")

if hasattr(request, "param"):
if request.param.get("fms_orchestrator_url_fixture"):
fms_orchestrator_url = request.getfixturevalue(argname=request.param.get("fms_orchestrator_url_fixture"))
# Override env vars with request parameters if provided
params = getattr(request, "param", {}) or {}
if params.get("fms_orchestrator_url_fixture"):
fms_orchestrator_url = request.getfixturevalue(argname=params.get("fms_orchestrator_url_fixture"))
if params.get("inference_model"):
inference_model = params.get("inference_model") # type: ignore
if params.get("vllm_api_token"):
vllm_api_token = params.get("vllm_api_token") # type: ignore
if params.get("vllm_url_fixture"):
vllm_url = request.getfixturevalue(argname=params.get("vllm_url_fixture"))

# Override env vars with request parameters if provided
if request.param.get("inference_model"):
inference_model = request.param.get("inference_model")
if request.param.get("vllm_api_token"):
vllm_api_token = request.param.get("vllm_api_token")
if request.param.get("vllm_url_fixture"):
vllm_url = request.getfixturevalue(argname=request.param.get("vllm_url_fixture"))

return {
server_config: Dict[str, Any] = {
"containerSpec": {
"resources": {
"requests": {"cpu": "250m", "memory": "500Mi"},
Expand Down Expand Up @@ -85,11 +89,14 @@ def llama_stack_server_config(
"port": 8321,
},
"distribution": {"name": "rh-dev"},
"storage": {
"size": "20Gi",
},
}

if params.get("llama_stack_storage_size"):
storage_size = params.get("llama_stack_storage_size")
server_config["storage"] = {"size": storage_size}

return server_config


@pytest.fixture(scope="class")
def llama_stack_distribution(
Expand Down Expand Up @@ -157,3 +164,139 @@ def llama_stack_client(
except Exception as e:
LOGGER.error(f"Failed to set up port forwarding: {e}")
raise


@pytest.fixture(scope="class")
def llama_stack_models(llama_stack_client: LlamaStackClient) -> ModelInfo:
"""
Returns model information from the LlamaStack client.

Provides:
- model_id: The identifier of the LLM model
- embedding_model: The embedding model object
- embedding_dimension: The dimension of the embedding model

Args:
llama_stack_client: The configured LlamaStackClient

Returns:
ModelInfo: NamedTuple containing model information
"""
models = llama_stack_client.models.list()
model_id = next(m for m in models if m.api_model_type == "llm").identifier

embedding_model = next(m for m in models if m.api_model_type == "embedding")
embedding_dimension = embedding_model.metadata["embedding_dimension"]

return ModelInfo(model_id=model_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension)


@pytest.fixture(scope="class")
def vector_store(
llama_stack_client: LlamaStackClient, llama_stack_models: ModelInfo
) -> Generator[VectorStore, None, None]:
"""
Creates a vector store for testing and automatically cleans it up.

This fixture creates a vector store, yields it to the test,
and ensures it's deleted after the test completes (whether it passes or fails).

Args:
llama_stack_client: The configured LlamaStackClient
llama_stack_models: Model information including embedding model details

Yields:
Vector store object that can be used in tests
"""
# Setup
vector_store = llama_stack_client.vector_stores.create(
name="test_vector_store",
embedding_model=llama_stack_models.embedding_model.identifier, # type: ignore
embedding_dimension=llama_stack_models.embedding_dimension,
)
Comment on lines +212 to +216
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Use string identifier returned by ModelInfo.

After aligning ModelInfo, pass the identifier directly (no .identifier).

-    vector_store = llama_stack_client.vector_stores.create(
-        name="test_vector_store",
-        embedding_model=llama_stack_models.embedding_model.identifier,  # type: ignore
-        embedding_dimension=llama_stack_models.embedding_dimension,
-    )
+    vector_store = llama_stack_client.vector_stores.create(
+        name="test_vector_store",
+        embedding_model=llama_stack_models.embedding_model,
+        embedding_dimension=llama_stack_models.embedding_dimension,
+    )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
vector_store = llama_stack_client.vector_stores.create(
name="test_vector_store",
embedding_model=llama_stack_models.embedding_model.identifier, # type: ignore
embedding_dimension=llama_stack_models.embedding_dimension,
)
vector_store = llama_stack_client.vector_stores.create(
name="test_vector_store",
embedding_model=llama_stack_models.embedding_model,
embedding_dimension=llama_stack_models.embedding_dimension,
)
🤖 Prompt for AI Agents
In tests/llama_stack/conftest.py around lines 257 to 261, the vector_store
creation currently passes llama_stack_models.embedding_model.identifier; update
it to pass the ModelInfo string directly by using
llama_stack_models.embedding_model (remove the .identifier) and drop the
unnecessary "# type: ignore" so embedding_model receives the ModelInfo string
identifier as expected.


yield vector_store

try:
llama_stack_client.vector_stores.delete(vector_store_id=vector_store.id)
LOGGER.info(f"Deleted vector store {vector_store.id}")
except Exception as e:
LOGGER.warning(f"Failed to delete vector store {vector_store.id}: {e}")


@retry(
wait_timeout=Timeout.TIMEOUT_1MIN,
sleep=5,
exceptions_dict={requests.exceptions.RequestException: [], Exception: []},
)
def _download_and_upload_file(url: str, llama_stack_client: LlamaStackClient, vector_store: Any) -> bool:
"""
Downloads a file from URL and uploads it to the vector store.

Args:
url: The URL to download the file from
llama_stack_client: The configured LlamaStackClient
vector_store: The vector store to upload the file to

Returns:
bool: True if successful, raises exception if failed
"""
try:
response = requests.get(url, timeout=30)
response.raise_for_status()

# Save file locally first and pretend it's a txt file, not sure why this is needed
# but it works locally without it,
# though llama stack version is the newer one.
file_name = url.split("/")[-1]
local_file_name = file_name.replace(".rst", ".txt")
with tempfile.NamedTemporaryFile(mode="wb", suffix=f"_{local_file_name}") as temp_file:
temp_file.write(response.content)
temp_file_path = temp_file.name

# Upload saved file to LlamaStack
with open(temp_file_path, "rb") as file_to_upload:
uploaded_file = llama_stack_client.files.create(file=file_to_upload, purpose="assistants")

# Add file to vector store
llama_stack_client.vector_stores.files.create(vector_store_id=vector_store.id, file_id=uploaded_file.id)

return True

except (requests.exceptions.RequestException, Exception) as e:
LOGGER.warning(f"Failed to download and upload file {url}: {e}")
raise


@pytest.fixture(scope="class")
def vector_store_with_docs(llama_stack_client: LlamaStackClient, vector_store: Any) -> Generator[Any, None, None]:
"""
Creates a vector store with TorchTune documentation files uploaded.

This fixture depends on the vector_store fixture and uploads the TorchTune
documentation files to the vector store for testing purposes. The files
are automatically cleaned up after the test completes.

Args:
llama_stack_client: The configured LlamaStackClient
vector_store: The vector store fixture to upload files to

Yields:
Vector store object with uploaded TorchTune documentation files
"""
# Download TorchTune documentation files
urls = [
"llama3.rst",
"chat.rst",
"lora_finetune.rst",
"qat_finetune.rst",
"memory_optimizations.rst",
]

base_url = "https://raw.githubusercontent.com/pytorch/torchtune/refs/tags/v0.6.1/docs/source/tutorials/"

for file_name in urls:
url = f"{base_url}{file_name}"
_download_and_upload_file(url=url, llama_stack_client=llama_stack_client, vector_store=vector_store)

yield vector_store
53 changes: 53 additions & 0 deletions tests/llama_stack/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from dataclasses import dataclass
from enum import Enum
from typing import List


class LlamaStackProviders:
Expand All @@ -12,3 +14,54 @@ class Safety(str, Enum):

class Eval(str, Enum):
TRUSTYAI_LMEVAL = "trustyai_lmeval"


@dataclass
class TorchTuneTestExpectation:
"""Test expectation for TorchTune documentation questions."""

question: str
expected_keywords: List[str]
description: str


TORCHTUNE_TEST_EXPECTATIONS: List[TorchTuneTestExpectation] = [
TorchTuneTestExpectation(
question="what is torchtune",
expected_keywords=["torchtune", "pytorch", "fine-tuning", "training", "model"],
description="Should provide information about torchtune framework",
),
TorchTuneTestExpectation(
question="What do you know about LoRA?",
expected_keywords=[
"LoRA",
"parameter",
"efficient",
"fine-tuning",
"reduce",
],
description="Should provide information about LoRA (Low Rank Adaptation)",
),
TorchTuneTestExpectation(
question="How can I optimize model training for quantization?",
expected_keywords=[
"Quantization-Aware Training",
"QAT",
"training",
"fine-tuning",
"fake",
"quantized",
],
description="Should provide information about QAT (Quantization-Aware Training)",
),
TorchTuneTestExpectation(
question="Are there any memory optimizations for LoRA?",
expected_keywords=["QLoRA", "fine-tuning", "4-bit", "Optimization", "LoRA"],
description="Should provide information about QLoRA",
),
TorchTuneTestExpectation(
question="tell me about dora",
expected_keywords=["dora", "parameter", "magnitude", "direction", "fine-tuning"],
description="Should provide information about DoRA (Weight-Decomposed Low-Rank Adaptation)",
),
]
6 changes: 5 additions & 1 deletion tests/llama_stack/core/test_llamastack_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
{"name": "test-llamastack-core"},
MinIo.PodConfig.QWEN_HAP_BPIV2_MINIO_CONFIG,
{"bucket": "llms"},
{"vllm_url_fixture": "qwen_isvc_url", "inference_model": QWEN_MODEL_NAME},
{
"vllm_url_fixture": "qwen_isvc_url",
"inference_model": QWEN_MODEL_NAME,
"llama_stack_storage_size": "10Gi",
},
)
],
indirect=True,
Expand Down
Loading