@@ -21,7 +21,7 @@ class TestRag:
2121
2222 @pytest .mark .smoke
2323 def test_llama_stack_server (
24- self , llama_stack_distribution_deployment : Deployment , lls_client : LlamaStackClient
24+ self , llama_stack_distribution_deployment : Deployment , rag_lls_client : LlamaStackClient
2525 ) -> None :
2626 """
2727 Test LlamaStack Server deployment and verify required models are available.
@@ -33,7 +33,7 @@ def test_llama_stack_server(
3333 """
3434 llama_stack_distribution_deployment .wait_for_replicas ()
3535
36- models = lls_client .models .list ()
36+ models = rag_lls_client .models .list ()
3737 assert models is not None , "No models returned from LlamaStackClient"
3838
3939 llm_model = next ((m for m in models if m .api_model_type == "llm" ), None )
@@ -50,19 +50,20 @@ def test_llama_stack_server(
5050 assert embedding_dimension is not None , "No embedding_dimension set in embedding model"
5151
5252 @pytest .mark .smoke
53- def test_rag_basic_inference (self , lls_client : LlamaStackClient ) -> None :
53+ def test_rag_basic_inference (self , rag_lls_client : LlamaStackClient ) -> None :
5454 """
5555 Test basic chat completion inference through LlamaStack client.
5656
5757 Validates that the server can perform text generation using the chat completions API
5858 and provides factually correct responses.
5959
60- Based on the example available at https://llama-stack.readthedocs.io/en/latest/getting_started/detailed_tutorial.html#step-4-run-the-demos
60+ Based on the example available at
61+ https://llama-stack.readthedocs.io/en/latest/getting_started/detailed_tutorial.html#step-4-run-the-demos
6162 """
62- models = lls_client .models .list ()
63+ models = rag_lls_client .models .list ()
6364 model_id = next (m for m in models if m .api_model_type == "llm" ).identifier
6465
65- response = lls_client .chat .completions .create (
66+ response = rag_lls_client .chat .completions .create (
6667 model = model_id ,
6768 messages = [
6869 {"role" : "system" , "content" : "You are a helpful assistant." },
@@ -77,18 +78,19 @@ def test_rag_basic_inference(self, lls_client: LlamaStackClient) -> None:
7778 assert "Paris" in content , "The LLM didn't provide the expected answer to the prompt"
7879
7980 @pytest .mark .smoke
80- def test_rag_simple_agent (self , lls_client : LlamaStackClient ) -> None :
81+ def test_rag_simple_agent (self , rag_lls_client : LlamaStackClient ) -> None :
8182 """
8283 Test basic agent creation and conversation capabilities.
8384
8485 Validates agent creation, session management, and turn-based interactions
8586 with both identity and capability questions.
8687
87- Based on the example available at https://llama-stack.readthedocs.io/en/latest/getting_started/detailed_tutorial.html#step-4-run-the-demos
88+ Based on the example available at
89+ https://llama-stack.readthedocs.io/en/latest/getting_started/detailed_tutorial.html#step-4-run-the-demos
8890 """
89- models = lls_client .models .list ()
91+ models = rag_lls_client .models .list ()
9092 model_id = next (m for m in models if m .api_model_type == "llm" ).identifier
91- agent = Agent (lls_client , model = model_id , instructions = "You are a helpful assistant." )
93+ agent = Agent (client = rag_lls_client , model = model_id , instructions = "You are a helpful assistant." )
9294 s_id = agent .create_session (session_name = f"s{ uuid .uuid4 ().hex } " )
9395
9496 # Test identity question
@@ -112,17 +114,18 @@ def test_rag_simple_agent(self, lls_client: LlamaStackClient) -> None:
112114 assert "answers" in content , "The LLM didn't provide the expected answer to the prompt"
113115
114116 @pytest .mark .smoke
115- def test_rag_build_rag_agent (self , lls_client : LlamaStackClient ) -> None :
117+ def test_rag_build_rag_agent (self , rag_lls_client : LlamaStackClient ) -> None :
116118 """
117119 Test full RAG pipeline with vector database integration and knowledge retrieval.
118120
119121 Creates a RAG agent with PyTorch torchtune documentation, tests knowledge queries
120122 about fine-tuning techniques (LoRA, QAT, memory optimizations), and validates
121123 that responses contain expected technical keywords.
122124
123- Based on the example available at https://llama-stack.readthedocs.io/en/latest/getting_started/detailed_tutorial.html#step-4-run-the-demos
125+ Based on the example available at
126+ https://llama-stack.readthedocs.io/en/latest/getting_started/detailed_tutorial.html#step-4-run-the-demos
124127 """
125- models = lls_client .models .list ()
128+ models = rag_lls_client .models .list ()
126129 model_id = next (m for m in models if m .api_model_type == "llm" ).identifier
127130 embedding_model = next (m for m in models if m .api_model_type == "embedding" )
128131
@@ -131,7 +134,7 @@ def test_rag_build_rag_agent(self, lls_client: LlamaStackClient) -> None:
131134 # Create a vector database instance
132135 vector_db_id = f"v{ uuid .uuid4 ().hex } "
133136
134- lls_client .vector_dbs .register (
137+ rag_lls_client .vector_dbs .register (
135138 vector_db_id = vector_db_id ,
136139 embedding_model = embedding_model .identifier ,
137140 embedding_dimension = embedding_dimension ,
@@ -141,7 +144,7 @@ def test_rag_build_rag_agent(self, lls_client: LlamaStackClient) -> None:
141144 try :
142145 # Create the RAG agent connected to the vector database
143146 rag_agent = Agent (
144- lls_client ,
147+ client = rag_lls_client ,
145148 model = model_id ,
146149 instructions = "You are a helpful assistant. Use the RAG tool to answer questions as needed." ,
147150 tools = [
@@ -164,14 +167,14 @@ def test_rag_build_rag_agent(self, lls_client: LlamaStackClient) -> None:
164167 documents = [
165168 RAGDocument (
166169 document_id = f"num-{ i } " ,
167- content = f"https://raw.githubusercontent.com/pytorch/torchtune/refs/tags/v0.6.1/docs/source/tutorials/{ url } " ,
170+ content = f"https://raw.githubusercontent.com/pytorch/torchtune/refs/tags/v0.6.1/docs/source/tutorials/{ url } " , # noqa
168171 mime_type = "text/plain" ,
169172 metadata = {},
170173 )
171174 for i , url in enumerate (urls )
172175 ]
173176
174- lls_client .tool_runtime .rag_tool .insert (
177+ rag_lls_client .tool_runtime .rag_tool .insert (
175178 documents = documents ,
176179 vector_db_id = vector_db_id ,
177180 chunk_size_in_tokens = 512 ,
@@ -243,6 +246,6 @@ def test_rag_build_rag_agent(self, lls_client: LlamaStackClient) -> None:
243246 finally :
244247 # Cleanup: unregister the vector database to prevent resource leaks
245248 try :
246- lls_client .vector_dbs .unregister (vector_db_id )
249+ rag_lls_client .vector_dbs .unregister (vector_db_id )
247250 except Exception as e :
248251 LOGGER .warning (f"Failed to unregister vector database { vector_db_id } : { e } " )
0 commit comments