Skip to content

Commit 4250bc2

Browse files
committed
RHAIENG-76 - Automated test that RAG generates valid answers
1 parent 05c976c commit 4250bc2

1 file changed

Lines changed: 150 additions & 0 deletions

File tree

tests/rag/test_rag.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,3 +249,153 @@ def test_rag_build_rag_agent(self, rag_lls_client: LlamaStackClient) -> None:
249249
rag_lls_client.vector_dbs.unregister(vector_db_id)
250250
except Exception as e:
251251
LOGGER.warning(f"Failed to unregister vector database {vector_db_id}: {e}")
252+
253+
def test_rag_pdf(self, rag_lls_client: LlamaStackClient) -> None:
254+
"""
255+
Test RAG functionality with PDF documents.
256+
257+
Creates a RAG agent with Docling PDF documentation, tests knowledge queries
258+
about Docling features, AI models, output formats, and capabilities, and validates
259+
that responses contain expected technical keywords.
260+
"""
261+
models = rag_lls_client.models.list()
262+
model_id = None
263+
embedding_model = None
264+
265+
# Iterate through the list of models to find the first LLM model (for model_id)
266+
# and the first embedding model (for embedding_model). Stop searching once both are found.
267+
for m in models:
268+
if m.api_model_type == "llm" and model_id is None:
269+
model_id = m.identifier
270+
if m.api_model_type == "embedding" and embedding_model is None:
271+
embedding_model = m
272+
if model_id is not None and embedding_model is not None:
273+
break
274+
275+
embedding_dimension = embedding_model.metadata["embedding_dimension"]
276+
277+
# Create a vector database instance
278+
vector_db_id = f"v{uuid.uuid4().hex}"
279+
280+
rag_lls_client.vector_dbs.register(
281+
vector_db_id=vector_db_id,
282+
embedding_model=embedding_model.identifier,
283+
embedding_dimension=embedding_dimension,
284+
provider_id="milvus",
285+
)
286+
287+
try:
288+
# Create the RAG agent connected to the vector database
289+
rag_agent = Agent(
290+
client=rag_lls_client,
291+
model=model_id,
292+
instructions="You are a helpful assistant. Use the RAG tool to answer questions as needed.",
293+
tools=[
294+
{
295+
"name": "builtin::rag/knowledge_search",
296+
"args": {"vector_db_ids": [vector_db_id]},
297+
}
298+
],
299+
)
300+
session_id = rag_agent.create_session(session_name=f"s{uuid.uuid4().hex}")
301+
302+
# Insert PDF documents about Docling
303+
pdf_files_urls = [
304+
"https://arxiv.org/pdf/2408.09869"
305+
]
306+
documents = [
307+
RAGDocument(
308+
document_id=f"num-{i}",
309+
content=file_url,
310+
mime_type="application/pdf",
311+
metadata={}
312+
)
313+
for i, file_url in enumerate(pdf_files_urls)
314+
]
315+
316+
rag_lls_client.tool_runtime.rag_tool.insert(
317+
documents=documents,
318+
vector_db_id=vector_db_id,
319+
chunk_size_in_tokens=512,
320+
)
321+
322+
turns_with_expectations: List[TurnExpectation] = [
323+
{
324+
"question": "What is Docling?",
325+
"expected_keywords": ["PDF", "conversion", "open-source", "MIT"],
326+
"description": "Should provide information about Docling framework",
327+
},
328+
{
329+
"question": "What AI models power Docling?",
330+
"expected_keywords": ["DocLayNet", "TableFormer", "layout", "analysis", "table", "structure"],
331+
"description": "Should provide information about Docling's AI models",
332+
},
333+
{
334+
"question": "What output formats does Docling support for converted PDF documents?",
335+
"expected_keywords": ["JSON", "Markdown"],
336+
"description": "Should provide information about Docling's output formats",
337+
},
338+
{
339+
"question": "Where can users find documentation and examples for Docling?",
340+
"expected_keywords": ["GitHub", "repository", "documentation", "examples", "DS4SD"],
341+
"description": "Should provide information about Docling documentation location",
342+
},
343+
{
344+
"question": "What is the processing pipeline of Docling?",
345+
"expected_keywords": ["PDF", "backend", "AI", "models", "post-processing"],
346+
"description": "Should provide information about Docling's processing pipeline",
347+
},
348+
{
349+
"question": "What are the two PDF backend choices available in Docling?",
350+
"expected_keywords": ["qpdf", "pypdfium", "docling-parse"],
351+
"description": "Should provide information about Docling's PDF backends",
352+
},
353+
{
354+
"question": "What is TableFormer?",
355+
"expected_keywords": ["vision-transformer", "table", "structure", "row", "column"],
356+
"description": "Should provide information about TableFormer model",
357+
},
358+
{
359+
"question": "What OCR library does Docling use in its initial release?",
360+
"expected_keywords": ["EasyOCR"],
361+
"description": "Should provide information about Docling's OCR library",
362+
},
363+
{
364+
"question": "How can users extend Docling's capabilities?",
365+
"expected_keywords": ["BaseModelPipeline", "sub-classing"],
366+
"description": "Should provide information about extending Docling",
367+
},
368+
{
369+
"question": "What are some of the downstream applications for Docling's output?",
370+
"expected_keywords": ["search", "retrieval", "RAG", "classification", "knowledge", "extraction"],
371+
"description": "Should provide information about Docling's applications",
372+
},
373+
]
374+
375+
# Ask the agent about the inserted documents and validate responses
376+
validation_result = validate_rag_agent_responses(
377+
rag_agent=rag_agent,
378+
session_id=session_id,
379+
turns_with_expectations=turns_with_expectations,
380+
stream=True,
381+
verbose=True,
382+
min_keywords_required=1,
383+
print_events=False,
384+
)
385+
386+
# Assert that validation was successful
387+
assert validation_result["success"], f"RAG PDF agent validation failed. Summary: {validation_result['summary']}"
388+
389+
# Additional assertions for specific requirements
390+
for result in validation_result["results"]:
391+
assert result["response_length"] > 0, f"No response content for question: {result['question']}"
392+
assert len(result["found_keywords"]) > 0, (
393+
f"No expected keywords found in response for: {result['question']}"
394+
)
395+
396+
finally:
397+
# Cleanup: unregister the vector database to prevent resource leaks
398+
try:
399+
rag_lls_client.vector_dbs.unregister(vector_db_id)
400+
except Exception as e:
401+
LOGGER.warning(f"Failed to unregister vector database {vector_db_id}: {e}")

0 commit comments

Comments
 (0)