opendatahub-io
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/llama_stack/agents/test_agents.py‎
Lines changed: 40 additions & 81 deletions b/‎tests/llama_stack/agents/test_agents.py‎
Lines changed: 40 additions & 81 deletions
diff --git a/‎tests/llama_stack/conftest.py‎
Lines changed: 44 additions & 3 deletions b/‎tests/llama_stack/conftest.py‎
Lines changed: 44 additions & 3 deletions
diff --git a/‎tests/llama_stack/inference/test_inference.py‎
Lines changed: 24 additions & 9 deletions b/‎tests/llama_stack/inference/test_inference.py‎
Lines changed: 24 additions & 9 deletions
diff --git a/‎tests/llama_stack/vector_io/test_vector_io_deprecated.py‎
Lines changed: 0 additions & 98 deletions b/‎tests/llama_stack/vector_io/test_vector_io_deprecated.py‎
Lines changed: 0 additions & 98 deletions
@@ -69,7 +69,7 @@ dependencies = [
     "marshmallow==3.26.1,<4", # this version is needed for pytest-jira
     "pytest-html>=4.1.1",
     "fire",
-    "llama_stack_client==0.2.23",
+    "llama_stack_client>=0.3.0,<0.4",
     "pytest-xdist==3.8.0",
     "dictdiffer>=0.9.0",
 ]
 
@@ -1,6 +1,7 @@
 import uuid
 import pytest
-from llama_stack_client import Agent, LlamaStackClient, RAGDocument
+from llama_stack_client import Agent, LlamaStackClient
+from llama_stack_client.types.vector_store import VectorStore
 from simple_logger.logger import get_logger
 from tests.llama_stack.constants import ModelInfo
 from tests.llama_stack.utils import get_torchtune_test_expectations, validate_rag_agent_responses
@@ -56,7 +57,7 @@ def test_agents_simple_agent(
             session_id=s_id,
             stream=False,
         )
-        content = response.output_message.content
+        content = response.output_text
         text = str(content or "")
         assert text, "LLM response content is empty"
         assert "model" in text.lower(), "The LLM didn't provide the expected answer to the prompt"
@@ -67,7 +68,7 @@ def test_agents_simple_agent(
             session_id=s_id,
             stream=False,
         )
-        content = response.output_message.content
+        content = response.output_text
         text = str(content or "")
         assert text, "LLM response content is empty"
         assert "answer" in text.lower(), "The LLM didn't provide the expected answer to the prompt"
@@ -77,6 +78,7 @@ def test_agents_rag_agent(
         self,
         unprivileged_llama_stack_client: LlamaStackClient,
         llama_stack_models: ModelInfo,
+        vector_store_with_example_docs: VectorStore,
     ) -> None:
         """
         Test RAG agent that can answer questions about the Torchtune project using the documents
@@ -92,83 +94,40 @@ def test_agents_rag_agent(
         # TODO: update this example to use the vector_store API
         """
 
-        vector_db_id: str | None = None
-        try:
-            vector_db = f"my-test-vector_db-{uuid.uuid4().hex}"
-            res = unprivileged_llama_stack_client.vector_dbs.register(
-                vector_db_id=vector_db,
-                embedding_model=llama_stack_models.embedding_model.identifier,
-                embedding_dimension=llama_stack_models.embedding_dimension,
-                provider_id="milvus",
-            )
-            vector_db_id = res.identifier
-
-            # Create the RAG agent connected to the vector database
-            rag_agent = Agent(
-                client=unprivileged_llama_stack_client,
-                model=llama_stack_models.model_id,
-                instructions="You are a helpful assistant. Use the RAG tool to answer questions as needed.",
-                tools=[
-                    {
-                        "name": "builtin::rag/knowledge_search",
-                        "args": {"vector_db_ids": [vector_db_id]},
-                    }
-                ],
-            )
-            session_id = rag_agent.create_session(session_name=f"s{uuid.uuid4().hex}")
-
-            # Insert into the vector database example documents about torchtune
-            urls = [
-                "llama3.rst",
-                "chat.rst",
-                "lora_finetune.rst",
-                "qat_finetune.rst",
-                "memory_optimizations.rst",
-            ]
-            documents = [
-                RAGDocument(
-                    document_id=f"num-{index}",
-                    content=f"https://raw.githubusercontent.com/pytorch/torchtune/refs/tags/v0.6.1/docs/source/tutorials/{url}",  # noqa
-                    mime_type="text/plain",
-                    metadata={},
-                )
-                for index, url in enumerate(urls)
-            ]
-
-            unprivileged_llama_stack_client.tool_runtime.rag_tool.insert(
-                documents=documents,
-                vector_db_id=vector_db_id,
-                chunk_size_in_tokens=512,
-            )
+        # Create the RAG agent connected to the vector database
+        rag_agent = Agent(
+            client=unprivileged_llama_stack_client,
+            model=llama_stack_models.model_id,
+            instructions="You are a helpful assistant. Use the available tools to answer questions as needed.",
+            tools=[
+                {
+                    "type": "file_search",
+                    "vector_store_ids": [vector_store_with_example_docs.id],
+                }
+            ],
+        )
+        session_id = rag_agent.create_session(session_name=f"s{uuid.uuid4().hex}")
+
+        turns_with_expectations = get_torchtune_test_expectations()
+
+        # Ask the agent about the inserted documents and validate responses
+        validation_result = validate_rag_agent_responses(
+            rag_agent=rag_agent,
+            session_id=session_id,
+            turns_with_expectations=turns_with_expectations,
+            stream=True,
+            verbose=True,
+            min_keywords_required=1,
+            print_events=False,
+        )
 
-            turns_with_expectations = get_torchtune_test_expectations()
-
-            # Ask the agent about the inserted documents and validate responses
-            validation_result = validate_rag_agent_responses(
-                rag_agent=rag_agent,
-                session_id=session_id,
-                turns_with_expectations=turns_with_expectations,
-                stream=True,
-                verbose=True,
-                min_keywords_required=1,
-                print_events=False,
-            )
+        # Assert that validation was successful
+        assert validation_result["success"], f"RAG agent validation failed. Summary: {validation_result['summary']}"
 
-            # Assert that validation was successful
-            assert validation_result["success"], f"RAG agent validation failed. Summary: {validation_result['summary']}"
-
-            # Additional assertions for specific requirements
-            for result in validation_result["results"]:
-                assert result["event_count"] > 0, f"No events generated for question: {result['question']}"
-                assert result["response_length"] > 0, f"No response content for question: {result['question']}"
-                assert len(result["found_keywords"]) > 0, (
-                    f"No expected keywords found in response for: {result['question']}"
-                )
-
-        finally:
-            # Cleanup: unregister the vector database to prevent resource leaks
-            if vector_db_id:
-                try:
-                    unprivileged_llama_stack_client.vector_dbs.unregister(vector_db_id)
-                except Exception as exc:
-                    LOGGER.warning("Failed to unregister vector database %s: %s", vector_db_id, exc)
+        # Additional assertions for specific requirements
+        for result in validation_result["results"]:
+            assert result["event_count"] > 0, f"No events generated for question: {result['question']}"
+            assert result["response_length"] > 0, f"No response content for question: {result['question']}"
+            assert len(result["found_keywords"]) > 0, (
+                f"No expected keywords found in response for: {result['question']}"
+            )
@@ -16,6 +16,7 @@
 from tests.llama_stack.utils import (
     create_llama_stack_distribution,
     wait_for_llama_stack_client_ready,
+    vector_store_create_file_from_url,
 )
 from utilities.constants import DscComponents, Timeout
 from utilities.data_science_cluster_utils import update_components_in_dsc
@@ -378,9 +379,11 @@ def vector_store(
 
     vector_store = unprivileged_llama_stack_client.vector_stores.create(
         name="test_vector_store",
-        embedding_model=llama_stack_models.embedding_model.identifier,
-        embedding_dimension=llama_stack_models.embedding_dimension,
-        provider_id=vector_io_provider,
+        extra_body={
+            "embedding_model": llama_stack_models.embedding_model.identifier,
+            "embedding_dimension": llama_stack_models.embedding_dimension,
+            "provider_id": vector_io_provider,
+        },
     )
     LOGGER.info(f"vector_store successfully created (provider_id={vector_io_provider}, id={vector_store.id})")
 
@@ -391,3 +394,41 @@ def vector_store(
         LOGGER.info(f"Deleted vector store {vector_store.id}")
     except Exception as e:
         LOGGER.warning(f"Failed to delete vector store {vector_store.id}: {e}")
+
+
+@pytest.fixture(scope="class")
+def vector_store_with_example_docs(
+    unprivileged_llama_stack_client: LlamaStackClient, vector_store: VectorStore
+) -> Generator[VectorStore, None, None]:
+    """
+    Creates a vector store with TorchTune documentation files uploaded.
+
+    This fixture depends on the vector_store fixture and uploads the TorchTune
+    documentation files to the vector store for testing purposes. The files
+    are automatically cleaned up after the test completes.
+
+    Args:
+        unprivileged_llama_stack_client: The configured LlamaStackClient
+        vector_store: The vector store fixture to upload files to
+
+    Yields:
+        Vector store object with uploaded TorchTune documentation files
+    """
+    # Download TorchTune documentation files
+    urls = [
+        "llama3.rst",
+        "chat.rst",
+        "lora_finetune.rst",
+        "qat_finetune.rst",
+        "memory_optimizations.rst",
+    ]
+
+    base_url = "https://raw.githubusercontent.com/pytorch/torchtune/refs/tags/v0.6.1/docs/source/tutorials/"
+
+    for file_name in urls:
+        url = f"{base_url}{file_name}"
+        vector_store_create_file_from_url(
+            url=url, llama_stack_client=unprivileged_llama_stack_client, vector_store=vector_store
+        )
+
+    yield vector_store
@@ -1,6 +1,6 @@
 import pytest
 from llama_stack_client import LlamaStackClient
-from llama_stack_client.types import EmbeddingsResponse
+from llama_stack_client.types import CreateEmbeddingsResponse
 from tests.llama_stack.constants import ModelInfo
 
 
@@ -73,12 +73,27 @@ def test_inference_embeddings(
         Validates that the server can generate properly formatted embedding vectors
         for text input with correct dimensions as specified in model metadata.
         """
-        embeddings_response = unprivileged_llama_stack_client.inference.embeddings(
-            model_id=llama_stack_models.embedding_model.identifier,
-            contents=["First chunk of text"],
-            output_dimension=llama_stack_models.embedding_dimension,
+
+        embeddings_response = unprivileged_llama_stack_client.embeddings.create(
+            model=llama_stack_models.embedding_model.identifier,
+            input="The food was delicious and the waiter...",
+            encoding_format="float",
         )
-        assert isinstance(embeddings_response, EmbeddingsResponse)
-        assert len(embeddings_response.embeddings) == 1
-        assert isinstance(embeddings_response.embeddings[0], list)
-        assert isinstance(embeddings_response.embeddings[0][0], float)
+
+        assert isinstance(embeddings_response, CreateEmbeddingsResponse)
+        assert len(embeddings_response.data) == 1
+        assert isinstance(embeddings_response.data[0].embedding, list)
+        assert llama_stack_models.embedding_dimension == len(embeddings_response.data[0].embedding)
+        assert isinstance(embeddings_response.data[0].embedding[0], float)
+
+        input_list = ["Input text 1", "Input text 1", "Input text 1"]
+        embeddings_response = unprivileged_llama_stack_client.embeddings.create(
+            model=llama_stack_models.embedding_model.identifier, input=input_list, encoding_format="float"
+        )
+
+        assert isinstance(embeddings_response, CreateEmbeddingsResponse)
+        assert len(embeddings_response.data) == len(input_list)
+        for item in range(len(input_list)):
+            assert isinstance(embeddings_response.data[item].embedding, list)
+            assert llama_stack_models.embedding_dimension == len(embeddings_response.data[item].embedding)
+            assert isinstance(embeddings_response.data[item].embedding[0], float)
Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,7 @@ dependencies = [`
`69`	`69`	`"marshmallow==3.26.1,<4", # this version is needed for pytest-jira`
`70`	`70`	`"pytest-html>=4.1.1",`
`71`	`71`	`"fire",`
`72`		`- "llama_stack_client==0.2.23",`
	`72`	`+ "llama_stack_client>=0.3.0,<0.4",`
`73`	`73`	`"pytest-xdist==3.8.0",`
`74`	`74`	`"dictdiffer>=0.9.0",`
`75`	`75`	`]`