Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions tests/llama_stack/core/test_llamastack_core.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import pytest

from tests.llama_stack.constants import LlamaStackProviders
from llama_stack_client import LlamaStackClient
from utilities.constants import MinIo, QWEN_MODEL_NAME
from ocp_resources.pod import Pod
from ocp_resources.secret import Secret


@pytest.mark.parametrize(
Expand All @@ -19,7 +22,9 @@
@pytest.mark.rawdeployment
@pytest.mark.smoke
class TestLlamaStackCore:
def test_lls_server_initial_state(self, minio_pod, minio_data_connection, llama_stack_client):
def test_lls_server_initial_state(
self, minio_pod: Pod, minio_data_connection: Secret, llama_stack_client: LlamaStackClient
) -> None:
models = llama_stack_client.models.list()
assert models is not None, "No models returned from LlamaStackClient"

Expand All @@ -36,13 +41,17 @@ def test_lls_server_initial_state(self, minio_pod, minio_data_connection, llama_
embedding_dimension = embedding_model.metadata["embedding_dimension"]
assert embedding_dimension is not None, "No embedding_dimension set in embedding model"

def test_model_register(self, minio_pod, minio_data_connection, llama_stack_client):
def test_model_register(
self, minio_pod: Pod, minio_data_connection: Secret, llama_stack_client: LlamaStackClient
) -> None:
response = llama_stack_client.models.register(
provider_id=LlamaStackProviders.Inference.VLLM_INFERENCE, model_type="llm", model_id=QWEN_MODEL_NAME
)
assert response

def test_model_list(self, minio_pod, minio_data_connection, llama_stack_client):
def test_model_list(
self, minio_pod: Pod, minio_data_connection: Secret, llama_stack_client: LlamaStackClient
) -> None:
models = llama_stack_client.models.list()

# We only need to check the first model;
Expand All @@ -52,7 +61,9 @@ def test_model_list(self, minio_pod, minio_data_connection, llama_stack_client):
assert models[0].model_type == "llm"
assert models[0].provider_id == LlamaStackProviders.Inference.VLLM_INFERENCE

def test_inference(self, minio_pod, minio_data_connection, llama_stack_client):
def test_inference(
self, minio_pod: Pod, minio_data_connection: Secret, llama_stack_client: LlamaStackClient
) -> None:
response = llama_stack_client.chat.completions.create(
model=QWEN_MODEL_NAME,
messages=[
Expand Down
29 changes: 10 additions & 19 deletions tests/llama_stack/rag/test_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,16 @@
from llama_stack_client.types import EmbeddingsResponse, QueryChunksResponse
from llama_stack_client.types.vector_io_insert_params import Chunk
from simple_logger.logger import get_logger

from utilities.constants import MinIo, QWEN_MODEL_NAME
from utilities.rag_utils import TurnExpectation, validate_rag_agent_responses

LOGGER = get_logger(name=__name__)


@pytest.mark.parametrize(
"model_namespace, minio_pod, minio_data_connection, llama_stack_server_config",
"model_namespace",
[
pytest.param(
{"name": "test-llamastack-rag"},
MinIo.PodConfig.QWEN_HAP_BPIV2_MINIO_CONFIG,
{"bucket": "llms"},
{
"vllm_url_fixture": "qwen_isvc_url",
"inference_model": QWEN_MODEL_NAME,
},
)
],
indirect=True,
Expand All @@ -37,9 +29,7 @@ class TestLlamaStackRag:
"""

@pytest.mark.smoke
def test_rag_inference_embeddings(
self, minio_pod, minio_data_connection, llama_stack_client: LlamaStackClient
) -> None:
def test_rag_inference_embeddings(self, llama_stack_client: LlamaStackClient) -> None:
"""
Test embedding model functionality and vector generation.

Expand All @@ -61,9 +51,7 @@ def test_rag_inference_embeddings(
assert isinstance(embeddings_response.embeddings[0][0], float)

@pytest.mark.smoke
def test_rag_vector_io_ingestion_retrieval(
self, minio_pod, minio_data_connection, llama_stack_client: LlamaStackClient
) -> None:
def test_rag_vector_io_ingestion_retrieval(self, llama_stack_client: LlamaStackClient) -> None:
"""
Validates basic vector_db API in llama-stack using milvus

Expand Down Expand Up @@ -122,7 +110,7 @@ def test_rag_vector_io_ingestion_retrieval(
LOGGER.warning(f"Failed to unregister vector database {vector_db_id}: {e}")

@pytest.mark.smoke
def test_rag_simple_agent(self, minio_pod, minio_data_connection, llama_stack_client: LlamaStackClient) -> None:
def test_rag_simple_agent(self, llama_stack_client: LlamaStackClient) -> None:
"""
Test basic agent creation and conversation capabilities.

Expand Down Expand Up @@ -153,12 +141,15 @@ def test_rag_simple_agent(self, minio_pod, minio_data_connection, llama_stack_cl
session_id=s_id,
stream=False,
)
content = response.output_message.content
content = response.output_message.content.lower()
assert content is not None, "LLM response content is None"
assert "answers" in content, "The LLM didn't provide the expected answer to the prompt"
assert "answer" in content, "The LLM didn't provide the expected answer to the prompt"
assert "translate" in content, "The LLM didn't provide the expected answer to the prompt"
assert "summarize" in content, "The LLM didn't provide the expected answer to the prompt"
assert "chat" in content, "The LLM didn't provide the expected answer to the prompt"

@pytest.mark.smoke
def test_rag_build_rag_agent(self, minio_pod, minio_data_connection, llama_stack_client: LlamaStackClient) -> None:
def test_rag_build_rag_agent(self, llama_stack_client: LlamaStackClient) -> None:
"""
Test full RAG pipeline with vector database integration and knowledge retrieval.

Expand Down