|
3 | 3 | from llama_stack_client.types.vector_store import VectorStore |
4 | 4 |
|
5 | 5 | from tests.llama_stack.constants import ModelInfo |
6 | | -from tests.llama_stack.utils import ( |
7 | | - create_response_function, |
8 | | - get_torchtune_test_expectations, |
9 | | - validate_api_responses, |
10 | | -) |
| 6 | + |
| 7 | +IBM_EARNINGS_RAG_QUERY = "How did IBM perform financially in the fourth quarter of 2025?" |
11 | 8 |
|
12 | 9 |
|
13 | 10 | def _assert_minimal_rag_response( |
14 | 11 | unprivileged_llama_stack_client: LlamaStackClient, |
15 | 12 | llama_stack_models: ModelInfo, |
16 | 13 | vector_store_with_example_docs: VectorStore, |
17 | 14 | ) -> None: |
18 | | - response_fn = create_response_function( |
19 | | - llama_stack_client=unprivileged_llama_stack_client, |
20 | | - llama_stack_models=llama_stack_models, |
21 | | - vector_store=vector_store_with_example_docs, |
| 15 | + response = unprivileged_llama_stack_client.responses.create( |
| 16 | + input=IBM_EARNINGS_RAG_QUERY, |
| 17 | + model=llama_stack_models.model_id, |
| 18 | + instructions="Always use the file_search tool to look up information before answering.", |
| 19 | + stream=False, |
| 20 | + tools=[ |
| 21 | + { |
| 22 | + "type": "file_search", |
| 23 | + "vector_store_ids": [vector_store_with_example_docs.id], |
| 24 | + } |
| 25 | + ], |
| 26 | + ) |
| 27 | + |
| 28 | + file_search_calls = [item for item in response.output if item.type == "file_search_call"] |
| 29 | + assert file_search_calls, ( |
| 30 | + "Expected file_search_call output item in the response, indicating the model " |
| 31 | + f"invoked file_search. Output types: {[item.type for item in response.output]}" |
| 32 | + ) |
| 33 | + |
| 34 | + file_search_call = file_search_calls[0] |
| 35 | + assert file_search_call.status == "completed", ( |
| 36 | + f"Expected file_search_call status 'completed', got '{file_search_call.status}'" |
22 | 37 | ) |
| 38 | + assert file_search_call.results, "file_search_call should contain retrieval results" |
23 | 39 |
|
24 | | - test_case = get_torchtune_test_expectations()[0] |
25 | | - answer = response_fn(question=test_case["question"]) |
| 40 | + annotations = [] |
| 41 | + for item in response.output: |
| 42 | + if item.type != "message" or not isinstance(item.content, list): |
| 43 | + continue |
| 44 | + for content_item in item.content: |
| 45 | + if content_item.annotations: |
| 46 | + annotations.extend(content_item.annotations) |
26 | 47 |
|
27 | | - assert answer is not None, "RAG response content is None" |
28 | | - assert isinstance(answer, str), "RAG response content should be a string" |
29 | | - assert answer.strip(), "RAG response content is empty" |
| 48 | + assert annotations, "Response should contain file_citation annotations when file_search returns results" |
| 49 | + assert any(annotation.type == "file_citation" for annotation in annotations), ( |
| 50 | + "Expected at least one file_citation annotation in response output" |
| 51 | + ) |
30 | 52 |
|
31 | 53 |
|
32 | 54 | @pytest.mark.parametrize( |
@@ -59,7 +81,7 @@ def test_vector_store_rag_pre_upgrade( |
59 | 81 |
|
60 | 82 | Given: A running unprivileged LlamaStack distribution with a vector store and uploaded documents. |
61 | 83 | When: A retrieval-augmented response is requested using file search. |
62 | | - Then: The generated answer is non-empty, confirming baseline RAG behavior before upgrade. |
| 84 | + Then: The response includes completed file_search_call output and file_citation annotations. |
63 | 85 | """ |
64 | 86 | _assert_minimal_rag_response( |
65 | 87 | unprivileged_llama_stack_client=unprivileged_llama_stack_client, |
@@ -97,21 +119,11 @@ def test_vector_store_rag_post_upgrade( |
97 | 119 | """Verify vector-store-backed RAG remains correct after upgrade. |
98 | 120 |
|
99 | 121 | Given: A pre-existing unprivileged LlamaStack distribution after upgrade with reused vector store docs. |
100 | | - When: The RAG response flow is validated across TorchTune expectation turns. |
101 | | - Then: All expectation checks pass, proving retrieval+inference continuity after upgrade. |
| 122 | + When: A retrieval-augmented response is requested using file search. |
| 123 | + Then: The response includes completed file_search_call output and file_citation annotations. |
102 | 124 | """ |
103 | | - response_fn = create_response_function( |
104 | | - llama_stack_client=unprivileged_llama_stack_client, |
| 125 | + _assert_minimal_rag_response( |
| 126 | + unprivileged_llama_stack_client=unprivileged_llama_stack_client, |
105 | 127 | llama_stack_models=llama_stack_models, |
106 | | - vector_store=vector_store_with_example_docs, |
107 | | - ) |
108 | | - |
109 | | - turns_with_expectations = get_torchtune_test_expectations() |
110 | | - validation_result = validate_api_responses( |
111 | | - response_fn=response_fn, |
112 | | - test_cases=turns_with_expectations, |
113 | | - ) |
114 | | - |
115 | | - assert validation_result["success"], ( |
116 | | - f"Post-upgrade RAG validation failed. Summary: {validation_result['summary']}" |
| 128 | + vector_store_with_example_docs=vector_store_with_example_docs, |
117 | 129 | ) |
0 commit comments