Skip to content

Commit ad95692

Browse files
committed
refactor(tests): update RAG response validation to assert file_search_call output and annotations
1 parent 188ba7d commit ad95692

File tree

1 file changed

+42
-30
lines changed

1 file changed

+42
-30
lines changed

tests/llama_stack/vector_io/upgrade/test_upgrade_vector_store_rag.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,52 @@
33
from llama_stack_client.types.vector_store import VectorStore
44

55
from tests.llama_stack.constants import ModelInfo
6-
from tests.llama_stack.utils import (
7-
create_response_function,
8-
get_torchtune_test_expectations,
9-
validate_api_responses,
10-
)
6+
7+
IBM_EARNINGS_RAG_QUERY = "How did IBM perform financially in the fourth quarter of 2025?"
118

129

1310
def _assert_minimal_rag_response(
1411
unprivileged_llama_stack_client: LlamaStackClient,
1512
llama_stack_models: ModelInfo,
1613
vector_store_with_example_docs: VectorStore,
1714
) -> None:
18-
response_fn = create_response_function(
19-
llama_stack_client=unprivileged_llama_stack_client,
20-
llama_stack_models=llama_stack_models,
21-
vector_store=vector_store_with_example_docs,
15+
response = unprivileged_llama_stack_client.responses.create(
16+
input=IBM_EARNINGS_RAG_QUERY,
17+
model=llama_stack_models.model_id,
18+
instructions="Always use the file_search tool to look up information before answering.",
19+
stream=False,
20+
tools=[
21+
{
22+
"type": "file_search",
23+
"vector_store_ids": [vector_store_with_example_docs.id],
24+
}
25+
],
26+
)
27+
28+
file_search_calls = [item for item in response.output if item.type == "file_search_call"]
29+
assert file_search_calls, (
30+
"Expected file_search_call output item in the response, indicating the model "
31+
f"invoked file_search. Output types: {[item.type for item in response.output]}"
32+
)
33+
34+
file_search_call = file_search_calls[0]
35+
assert file_search_call.status == "completed", (
36+
f"Expected file_search_call status 'completed', got '{file_search_call.status}'"
2237
)
38+
assert file_search_call.results, "file_search_call should contain retrieval results"
2339

24-
test_case = get_torchtune_test_expectations()[0]
25-
answer = response_fn(question=test_case["question"])
40+
annotations = []
41+
for item in response.output:
42+
if item.type != "message" or not isinstance(item.content, list):
43+
continue
44+
for content_item in item.content:
45+
if content_item.annotations:
46+
annotations.extend(content_item.annotations)
2647

27-
assert answer is not None, "RAG response content is None"
28-
assert isinstance(answer, str), "RAG response content should be a string"
29-
assert answer.strip(), "RAG response content is empty"
48+
assert annotations, "Response should contain file_citation annotations when file_search returns results"
49+
assert any(annotation.type == "file_citation" for annotation in annotations), (
50+
"Expected at least one file_citation annotation in response output"
51+
)
3052

3153

3254
@pytest.mark.parametrize(
@@ -59,7 +81,7 @@ def test_vector_store_rag_pre_upgrade(
5981
6082
Given: A running unprivileged LlamaStack distribution with a vector store and uploaded documents.
6183
When: A retrieval-augmented response is requested using file search.
62-
Then: The generated answer is non-empty, confirming baseline RAG behavior before upgrade.
84+
Then: The response includes completed file_search_call output and file_citation annotations.
6385
"""
6486
_assert_minimal_rag_response(
6587
unprivileged_llama_stack_client=unprivileged_llama_stack_client,
@@ -97,21 +119,11 @@ def test_vector_store_rag_post_upgrade(
97119
"""Verify vector-store-backed RAG remains correct after upgrade.
98120
99121
Given: A pre-existing unprivileged LlamaStack distribution after upgrade with reused vector store docs.
100-
When: The RAG response flow is validated across TorchTune expectation turns.
101-
Then: All expectation checks pass, proving retrieval+inference continuity after upgrade.
122+
When: A retrieval-augmented response is requested using file search.
123+
Then: The response includes completed file_search_call output and file_citation annotations.
102124
"""
103-
response_fn = create_response_function(
104-
llama_stack_client=unprivileged_llama_stack_client,
125+
_assert_minimal_rag_response(
126+
unprivileged_llama_stack_client=unprivileged_llama_stack_client,
105127
llama_stack_models=llama_stack_models,
106-
vector_store=vector_store_with_example_docs,
107-
)
108-
109-
turns_with_expectations = get_torchtune_test_expectations()
110-
validation_result = validate_api_responses(
111-
response_fn=response_fn,
112-
test_cases=turns_with_expectations,
113-
)
114-
115-
assert validation_result["success"], (
116-
f"Post-upgrade RAG validation failed. Summary: {validation_result['summary']}"
128+
vector_store_with_example_docs=vector_store_with_example_docs,
117129
)

0 commit comments

Comments
 (0)