opendatahub-io · jgarciao · Sep 9, 2025 · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025
@@ -1,7 +1,10 @@
 import pytest
 
 from tests.llama_stack.constants import LlamaStackProviders
+from llama_stack_client import LlamaStackClient
 from utilities.constants import MinIo, QWEN_MODEL_NAME
+from ocp_resources.pod import Pod
+from ocp_resources.secret import Secret
 
 
 @pytest.mark.parametrize(
@@ -19,7 +22,9 @@
 @pytest.mark.rawdeployment
 @pytest.mark.smoke
 class TestLlamaStackCore:
-    def test_lls_server_initial_state(self, minio_pod, minio_data_connection, llama_stack_client):
+    def test_lls_server_initial_state(
+        self, minio_pod: Pod, minio_data_connection: Secret, llama_stack_client: LlamaStackClient
+    ) -> None:
         models = llama_stack_client.models.list()
         assert models is not None, "No models returned from LlamaStackClient"
 
@@ -36,13 +41,17 @@ def test_lls_server_initial_state(self, minio_pod, minio_data_connection, llama_
         embedding_dimension = embedding_model.metadata["embedding_dimension"]
         assert embedding_dimension is not None, "No embedding_dimension set in embedding model"
 
-    def test_model_register(self, minio_pod, minio_data_connection, llama_stack_client):
+    def test_model_register(
+        self, minio_pod: Pod, minio_data_connection: Secret, llama_stack_client: LlamaStackClient
+    ) -> None:
         response = llama_stack_client.models.register(
             provider_id=LlamaStackProviders.Inference.VLLM_INFERENCE, model_type="llm", model_id=QWEN_MODEL_NAME
         )
         assert response
 
-    def test_model_list(self, minio_pod, minio_data_connection, llama_stack_client):
+    def test_model_list(
+        self, minio_pod: Pod, minio_data_connection: Secret, llama_stack_client: LlamaStackClient
+    ) -> None:
         models = llama_stack_client.models.list()
 
         # We only need to check the first model;
@@ -52,7 +61,9 @@ def test_model_list(self, minio_pod, minio_data_connection, llama_stack_client):
         assert models[0].model_type == "llm"
         assert models[0].provider_id == LlamaStackProviders.Inference.VLLM_INFERENCE
 
-    def test_inference(self, minio_pod, minio_data_connection, llama_stack_client):
+    def test_inference(
+        self, minio_pod: Pod, minio_data_connection: Secret, llama_stack_client: LlamaStackClient
+    ) -> None:
         response = llama_stack_client.chat.completions.create(
             model=QWEN_MODEL_NAME,
             messages=[

@@ -6,24 +6,16 @@
 from llama_stack_client.types import EmbeddingsResponse, QueryChunksResponse
 from llama_stack_client.types.vector_io_insert_params import Chunk
 from simple_logger.logger import get_logger
-
-from utilities.constants import MinIo, QWEN_MODEL_NAME
 from utilities.rag_utils import TurnExpectation, validate_rag_agent_responses
 
 LOGGER = get_logger(name=__name__)
 
 
 @pytest.mark.parametrize(
-    "model_namespace, minio_pod, minio_data_connection, llama_stack_server_config",
+    "model_namespace",
     [
         pytest.param(
             {"name": "test-llamastack-rag"},
-            MinIo.PodConfig.QWEN_HAP_BPIV2_MINIO_CONFIG,
-            {"bucket": "llms"},
-            {
-                "vllm_url_fixture": "qwen_isvc_url",
-                "inference_model": QWEN_MODEL_NAME,
-            },
         )
     ],
     indirect=True,
@@ -37,9 +29,7 @@ class TestLlamaStackRag:
     """
 
     @pytest.mark.smoke
-    def test_rag_inference_embeddings(
-        self, minio_pod, minio_data_connection, llama_stack_client: LlamaStackClient
-    ) -> None:
+    def test_rag_inference_embeddings(self, llama_stack_client: LlamaStackClient) -> None:
         """
         Test embedding model functionality and vector generation.
 
@@ -61,9 +51,7 @@ def test_rag_inference_embeddings(
         assert isinstance(embeddings_response.embeddings[0][0], float)
 
     @pytest.mark.smoke
-    def test_rag_vector_io_ingestion_retrieval(
-        self, minio_pod, minio_data_connection, llama_stack_client: LlamaStackClient
-    ) -> None:
+    def test_rag_vector_io_ingestion_retrieval(self, llama_stack_client: LlamaStackClient) -> None:
         """
         Validates basic vector_db API in llama-stack using milvus
 
@@ -122,7 +110,7 @@ def test_rag_vector_io_ingestion_retrieval(
                 LOGGER.warning(f"Failed to unregister vector database {vector_db_id}: {e}")
 
     @pytest.mark.smoke
-    def test_rag_simple_agent(self, minio_pod, minio_data_connection, llama_stack_client: LlamaStackClient) -> None:
+    def test_rag_simple_agent(self, llama_stack_client: LlamaStackClient) -> None:
         """
         Test basic agent creation and conversation capabilities.
 
@@ -153,12 +141,15 @@ def test_rag_simple_agent(self, minio_pod, minio_data_connection, llama_stack_cl
             session_id=s_id,
             stream=False,
         )
-        content = response.output_message.content
+        content = response.output_message.content.lower()
         assert content is not None, "LLM response content is None"
-        assert "answers" in content, "The LLM didn't provide the expected answer to the prompt"
+        assert "answer" in content, "The LLM didn't provide the expected answer to the prompt"
+        assert "translate" in content, "The LLM didn't provide the expected answer to the prompt"
+        assert "summarize" in content, "The LLM didn't provide the expected answer to the prompt"
+        assert "chat" in content, "The LLM didn't provide the expected answer to the prompt"
 
     @pytest.mark.smoke
-    def test_rag_build_rag_agent(self, minio_pod, minio_data_connection, llama_stack_client: LlamaStackClient) -> None:
+    def test_rag_build_rag_agent(self, llama_stack_client: LlamaStackClient) -> None:
         """
         Test full RAG pipeline with vector database integration and knowledge retrieval.